diff options
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/parse.go')
-rw-r--r-- | vendor/github.com/BurntSushi/toml/parse.go | 585 |
1 files changed, 366 insertions, 219 deletions
diff --git a/vendor/github.com/BurntSushi/toml/parse.go b/vendor/github.com/BurntSushi/toml/parse.go index 50869ef92..d9ae5db94 100644 --- a/vendor/github.com/BurntSushi/toml/parse.go +++ b/vendor/github.com/BurntSushi/toml/parse.go @@ -1,12 +1,14 @@ package toml import ( + "errors" "fmt" "strconv" "strings" "time" - "unicode" "unicode/utf8" + + "github.com/BurntSushi/toml/internal" ) type parser struct { @@ -14,39 +16,54 @@ type parser struct { types map[string]tomlType lx *lexer - // A list of keys in the order that they appear in the TOML data. - ordered []Key - - // the full key for the current hash in scope - context Key - - // the base key name for everything except hashes - currentKey string - - // rough approximation of line number - approxLine int - - // A map of 'key.group.names' to whether they were created implicitly. - implicits map[string]bool + ordered []Key // List of keys in the order that they appear in the TOML data. + context Key // Full key for the current hash in scope. + currentKey string // Base key name for everything except hashes. + approxLine int // Rough approximation of line number + implicits map[string]bool // Record implied keys (e.g. 'key.group.names'). } -type parseError string +// ParseError is used when a file can't be parsed: for example invalid integer +// literals, duplicate keys, etc. +type ParseError struct { + Message string + Line int + LastKey string +} -func (pe parseError) Error() string { - return string(pe) +func (pe ParseError) Error() string { + return fmt.Sprintf("Near line %d (last key parsed '%s'): %s", + pe.Line, pe.LastKey, pe.Message) } func parse(data string) (p *parser, err error) { defer func() { if r := recover(); r != nil { var ok bool - if err, ok = r.(parseError); ok { + if err, ok = r.(ParseError); ok { return } panic(r) } }() + // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() + // which mangles stuff. + if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { + data = data[2:] + } + + // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 + // file (second byte in surrogate pair being NULL). Again, do this here to + // avoid having to deal with UTF-8/16 stuff in the lexer. + ex := 6 + if len(data) < 6 { + ex = len(data) + } + if strings.ContainsRune(data[:ex], 0) { + return nil, errors.New("files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8") + } + p = &parser{ mapping: make(map[string]interface{}), types: make(map[string]tomlType), @@ -66,13 +83,17 @@ func parse(data string) (p *parser, err error) { } func (p *parser) panicf(format string, v ...interface{}) { - msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", - p.approxLine, p.current(), fmt.Sprintf(format, v...)) - panic(parseError(msg)) + msg := fmt.Sprintf(format, v...) + panic(ParseError{ + Message: msg, + Line: p.approxLine, + LastKey: p.current(), + }) } func (p *parser) next() item { it := p.lx.nextItem() + //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.line, it.val) if it.typ == itemError { p.panicf("%s", it.val) } @@ -97,44 +118,63 @@ func (p *parser) assertEqual(expected, got itemType) { func (p *parser) topLevel(item item) { switch item.typ { - case itemCommentStart: + case itemCommentStart: // # .. p.approxLine = item.line p.expect(itemText) - case itemTableStart: - kg := p.next() - p.approxLine = kg.line + case itemTableStart: // [ .. ] + name := p.next() + p.approxLine = name.line var key Key - for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { - key = append(key, p.keyString(kg)) + for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { + key = append(key, p.keyString(name)) } - p.assertEqual(itemTableEnd, kg.typ) + p.assertEqual(itemTableEnd, name.typ) - p.establishContext(key, false) + p.addContext(key, false) p.setType("", tomlHash) p.ordered = append(p.ordered, key) - case itemArrayTableStart: - kg := p.next() - p.approxLine = kg.line + case itemArrayTableStart: // [[ .. ]] + name := p.next() + p.approxLine = name.line var key Key - for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { - key = append(key, p.keyString(kg)) + for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { + key = append(key, p.keyString(name)) } - p.assertEqual(itemArrayTableEnd, kg.typ) + p.assertEqual(itemArrayTableEnd, name.typ) - p.establishContext(key, true) + p.addContext(key, true) p.setType("", tomlArrayHash) p.ordered = append(p.ordered, key) - case itemKeyStart: - kname := p.next() - p.approxLine = kname.line - p.currentKey = p.keyString(kname) - - val, typ := p.value(p.next()) - p.setValue(p.currentKey, val) - p.setType(p.currentKey, typ) + case itemKeyStart: // key = .. + outerContext := p.context + /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') + k := p.next() + p.approxLine = k.line + var key Key + for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { + key = append(key, p.keyString(k)) + } + p.assertEqual(itemKeyEnd, k.typ) + + /// The current key is the last part. + p.currentKey = key[len(key)-1] + + /// All the other parts (if any) are the context; need to set each part + /// as implicit. + context := key[:len(key)-1] + for i := range context { + p.addImplicitContext(append(p.context, context[i:i+1]...)) + } + + /// Set value. + val, typ := p.value(p.next(), false) + p.set(p.currentKey, val, typ) p.ordered = append(p.ordered, p.context.add(p.currentKey)) + + /// Remove the context we added (preserving any context from [tbl] lines). + p.context = outerContext p.currentKey = "" default: p.bug("Unexpected type at top level: %s", item.typ) @@ -148,180 +188,253 @@ func (p *parser) keyString(it item) string { return it.val case itemString, itemMultilineString, itemRawString, itemRawMultilineString: - s, _ := p.value(it) + s, _ := p.value(it, false) return s.(string) default: p.bug("Unexpected key type: %s", it.typ) - panic("unreachable") } + panic("unreachable") } +var datetimeRepl = strings.NewReplacer( + "z", "Z", + "t", "T", + " ", "T") + // value translates an expected value from the lexer into a Go value wrapped // as an empty interface. -func (p *parser) value(it item) (interface{}, tomlType) { +func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) { switch it.typ { case itemString: return p.replaceEscapes(it.val), p.typeOfPrimitive(it) case itemMultilineString: - trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) - return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) + return p.replaceEscapes(stripFirstNewline(stripEscapedNewlines(it.val))), p.typeOfPrimitive(it) case itemRawString: return it.val, p.typeOfPrimitive(it) case itemRawMultilineString: return stripFirstNewline(it.val), p.typeOfPrimitive(it) + case itemInteger: + return p.valueInteger(it) + case itemFloat: + return p.valueFloat(it) case itemBool: switch it.val { case "true": return true, p.typeOfPrimitive(it) case "false": return false, p.typeOfPrimitive(it) + default: + p.bug("Expected boolean value, but got '%s'.", it.val) } - p.bug("Expected boolean value, but got '%s'.", it.val) - case itemInteger: - if !numUnderscoresOK(it.val) { - p.panicf("Invalid integer %q: underscores must be surrounded by digits", - it.val) - } - val := strings.Replace(it.val, "_", "", -1) - num, err := strconv.ParseInt(val, 10, 64) - if err != nil { - // Distinguish integer values. Normally, it'd be a bug if the lexer - // provides an invalid integer, but it's possible that the number is - // out of range of valid values (which the lexer cannot determine). - // So mark the former as a bug but the latter as a legitimate user - // error. - if e, ok := err.(*strconv.NumError); ok && - e.Err == strconv.ErrRange { - - p.panicf("Integer '%s' is out of the range of 64-bit "+ - "signed integers.", it.val) - } else { - p.bug("Expected integer value, but got '%s'.", it.val) - } + case itemDatetime: + return p.valueDatetime(it) + case itemArray: + return p.valueArray(it) + case itemInlineTableStart: + return p.valueInlineTable(it, parentIsArray) + default: + p.bug("Unexpected value type: %s", it.typ) + } + panic("unreachable") +} + +func (p *parser) valueInteger(it item) (interface{}, tomlType) { + if !numUnderscoresOK(it.val) { + p.panicf("Invalid integer %q: underscores must be surrounded by digits", it.val) + } + if numHasLeadingZero(it.val) { + p.panicf("Invalid integer %q: cannot have leading zeroes", it.val) + } + + num, err := strconv.ParseInt(it.val, 0, 64) + if err != nil { + // Distinguish integer values. Normally, it'd be a bug if the lexer + // provides an invalid integer, but it's possible that the number is + // out of range of valid values (which the lexer cannot determine). + // So mark the former as a bug but the latter as a legitimate user + // error. + if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { + p.panicf("Integer '%s' is out of the range of 64-bit signed integers.", it.val) + } else { + p.bug("Expected integer value, but got '%s'.", it.val) } - return num, p.typeOfPrimitive(it) - case itemFloat: - parts := strings.FieldsFunc(it.val, func(r rune) bool { - switch r { - case '.', 'e', 'E': - return true - } - return false - }) - for _, part := range parts { - if !numUnderscoresOK(part) { - p.panicf("Invalid float %q: underscores must be "+ - "surrounded by digits", it.val) - } + } + return num, p.typeOfPrimitive(it) +} + +func (p *parser) valueFloat(it item) (interface{}, tomlType) { + parts := strings.FieldsFunc(it.val, func(r rune) bool { + switch r { + case '.', 'e', 'E': + return true } - if !numPeriodsOK(it.val) { - // As a special case, numbers like '123.' or '1.e2', - // which are valid as far as Go/strconv are concerned, - // must be rejected because TOML says that a fractional - // part consists of '.' followed by 1+ digits. - p.panicf("Invalid float %q: '.' must be followed "+ - "by one or more digits", it.val) - } - val := strings.Replace(it.val, "_", "", -1) - num, err := strconv.ParseFloat(val, 64) - if err != nil { - if e, ok := err.(*strconv.NumError); ok && - e.Err == strconv.ErrRange { - - p.panicf("Float '%s' is out of the range of 64-bit "+ - "IEEE-754 floating-point numbers.", it.val) - } else { - p.panicf("Invalid float value: %q", it.val) - } + return false + }) + for _, part := range parts { + if !numUnderscoresOK(part) { + p.panicf("Invalid float %q: underscores must be surrounded by digits", it.val) } - return num, p.typeOfPrimitive(it) - case itemDatetime: - var t time.Time - var ok bool - var err error - for _, format := range []string{ - "2006-01-02T15:04:05Z07:00", - "2006-01-02T15:04:05", - "2006-01-02", - } { - t, err = time.ParseInLocation(format, it.val, time.Local) - if err == nil { - ok = true - break - } + } + if len(parts) > 0 && numHasLeadingZero(parts[0]) { + p.panicf("Invalid float %q: cannot have leading zeroes", it.val) + } + if !numPeriodsOK(it.val) { + // As a special case, numbers like '123.' or '1.e2', + // which are valid as far as Go/strconv are concerned, + // must be rejected because TOML says that a fractional + // part consists of '.' followed by 1+ digits. + p.panicf("Invalid float %q: '.' must be followed by one or more digits", it.val) + } + val := strings.Replace(it.val, "_", "", -1) + if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does. + val = "nan" + } + num, err := strconv.ParseFloat(val, 64) + if err != nil { + if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { + p.panicf("Float '%s' is out of the range of 64-bit IEEE-754 floating-point numbers.", it.val) + } else { + p.panicf("Invalid float value: %q", it.val) } - if !ok { - p.panicf("Invalid TOML Datetime: %q.", it.val) + } + return num, p.typeOfPrimitive(it) +} + +var dtTypes = []struct { + fmt string + zone *time.Location +}{ + {time.RFC3339Nano, time.Local}, + {"2006-01-02T15:04:05.999999999", internal.LocalDatetime}, + {"2006-01-02", internal.LocalDate}, + {"15:04:05.999999999", internal.LocalTime}, +} + +func (p *parser) valueDatetime(it item) (interface{}, tomlType) { + it.val = datetimeRepl.Replace(it.val) + var ( + t time.Time + ok bool + err error + ) + for _, dt := range dtTypes { + t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) + if err == nil { + ok = true + break } - return t, p.typeOfPrimitive(it) - case itemArray: - array := make([]interface{}, 0) - types := make([]tomlType, 0) + } + if !ok { + p.panicf("Invalid TOML Datetime: %q.", it.val) + } + return t, p.typeOfPrimitive(it) +} - for it = p.next(); it.typ != itemArrayEnd; it = p.next() { - if it.typ == itemCommentStart { - p.expect(itemText) - continue - } +func (p *parser) valueArray(it item) (interface{}, tomlType) { + p.setType(p.currentKey, tomlArray) + + // p.setType(p.currentKey, typ) + var ( + array []interface{} + types []tomlType + ) + for it = p.next(); it.typ != itemArrayEnd; it = p.next() { + if it.typ == itemCommentStart { + p.expect(itemText) + continue + } + + val, typ := p.value(it, true) + array = append(array, val) + types = append(types, typ) + } + return array, tomlArray +} + +func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) { + var ( + hash = make(map[string]interface{}) + outerContext = p.context + outerKey = p.currentKey + ) + + p.context = append(p.context, p.currentKey) + prevContext := p.context + p.currentKey = "" + + p.addImplicit(p.context) + p.addContext(p.context, parentIsArray) - val, typ := p.value(it) - array = append(array, val) - types = append(types, typ) + /// Loop over all table key/value pairs. + for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { + if it.typ == itemCommentStart { + p.expect(itemText) + continue } - return array, p.typeOfArray(types) - case itemInlineTableStart: - var ( - hash = make(map[string]interface{}) - outerContext = p.context - outerKey = p.currentKey - ) - p.context = append(p.context, p.currentKey) - p.currentKey = "" - for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { - if it.typ != itemKeyStart { - p.bug("Expected key start but instead found %q, around line %d", - it.val, p.approxLine) - } - if it.typ == itemCommentStart { - p.expect(itemText) - continue - } + /// Read all key parts. + k := p.next() + p.approxLine = k.line + var key Key + for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { + key = append(key, p.keyString(k)) + } + p.assertEqual(itemKeyEnd, k.typ) - // retrieve key - k := p.next() - p.approxLine = k.line - kname := p.keyString(k) + /// The current key is the last part. + p.currentKey = key[len(key)-1] - // retrieve value - p.currentKey = kname - val, typ := p.value(p.next()) - // make sure we keep metadata up to date - p.setType(kname, typ) - p.ordered = append(p.ordered, p.context.add(p.currentKey)) - hash[kname] = val + /// All the other parts (if any) are the context; need to set each part + /// as implicit. + context := key[:len(key)-1] + for i := range context { + p.addImplicitContext(append(p.context, context[i:i+1]...)) } - p.context = outerContext - p.currentKey = outerKey - return hash, tomlHash + + /// Set the value. + val, typ := p.value(p.next(), false) + p.set(p.currentKey, val, typ) + p.ordered = append(p.ordered, p.context.add(p.currentKey)) + hash[p.currentKey] = val + + /// Restore context. + p.context = prevContext } - p.bug("Unexpected value type: %s", it.typ) - panic("unreachable") + p.context = outerContext + p.currentKey = outerKey + return hash, tomlHash +} + +// numHasLeadingZero checks if this number has leading zeroes, allowing for '0', +// +/- signs, and base prefixes. +func numHasLeadingZero(s string) bool { + if len(s) > 1 && s[0] == '0' && isDigit(rune(s[1])) { // >1 to allow "0" and isDigit to allow 0x + return true + } + if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { + return true + } + return false } // numUnderscoresOK checks whether each underscore in s is surrounded by // characters that are not underscores. func numUnderscoresOK(s string) bool { + switch s { + case "nan", "+nan", "-nan", "inf", "-inf", "+inf": + return true + } accept := false for _, r := range s { if r == '_' { if !accept { return false } - accept = false - continue } - accept = true + + // isHexadecimal is a superset of all the permissable characters + // surrounding an underscore. + accept = isHexadecimal(r) } return accept } @@ -338,13 +451,12 @@ func numPeriodsOK(s string) bool { return !period } -// establishContext sets the current context of the parser, -// where the context is either a hash or an array of hashes. Which one is -// set depends on the value of the `array` parameter. +// Set the current context of the parser, where the context is either a hash or +// an array of hashes, depending on the value of the `array` parameter. // // Establishing the context also makes sure that the key isn't a duplicate, and // will create implicit hashes automatically. -func (p *parser) establishContext(key Key, array bool) { +func (p *parser) addContext(key Key, array bool) { var ok bool // Always start at the top level and drill down for our context. @@ -383,7 +495,7 @@ func (p *parser) establishContext(key Key, array bool) { // list of tables for it. k := key[len(key)-1] if _, ok := hashContext[k]; !ok { - hashContext[k] = make([]map[string]interface{}, 0, 5) + hashContext[k] = make([]map[string]interface{}, 0, 4) } // Add a new table. But make sure the key hasn't already been used @@ -391,8 +503,7 @@ func (p *parser) establishContext(key Key, array bool) { if hash, ok := hashContext[k].([]map[string]interface{}); ok { hashContext[k] = append(hash, make(map[string]interface{})) } else { - p.panicf("Key '%s' was already created and cannot be used as "+ - "an array.", keyContext) + p.panicf("Key '%s' was already created and cannot be used as an array.", keyContext) } } else { p.setValue(key[len(key)-1], make(map[string]interface{})) @@ -400,15 +511,22 @@ func (p *parser) establishContext(key Key, array bool) { p.context = append(p.context, key[len(key)-1]) } +// set calls setValue and setType. +func (p *parser) set(key string, val interface{}, typ tomlType) { + p.setValue(p.currentKey, val) + p.setType(p.currentKey, typ) +} + // setValue sets the given key to the given value in the current context. // It will make sure that the key hasn't already been defined, account for // implicit key groups. func (p *parser) setValue(key string, value interface{}) { - var tmpHash interface{} - var ok bool - - hash := p.mapping - keyContext := make(Key, 0) + var ( + tmpHash interface{} + ok bool + hash = p.mapping + keyContext Key + ) for _, k := range p.context { keyContext = append(keyContext, k) if tmpHash, ok = hash[k]; !ok { @@ -422,24 +540,26 @@ func (p *parser) setValue(key string, value interface{}) { case map[string]interface{}: hash = t default: - p.bug("Expected hash to have type 'map[string]interface{}', but "+ - "it has '%T' instead.", tmpHash) + p.panicf("Key '%s' has already been defined.", keyContext) } } keyContext = append(keyContext, key) if _, ok := hash[key]; ok { - // Typically, if the given key has already been set, then we have - // to raise an error since duplicate keys are disallowed. However, - // it's possible that a key was previously defined implicitly. In this - // case, it is allowed to be redefined concretely. (See the - // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) + // Normally redefining keys isn't allowed, but the key could have been + // defined implicitly and it's allowed to be redefined concretely. (See + // the `valid/implicit-and-explicit-after.toml` in toml-test) // // But we have to make sure to stop marking it as an implicit. (So that // another redefinition provokes an error.) // // Note that since it has already been defined (as a hash), we don't // want to overwrite it. So our business is done. + if p.isArray(keyContext) { + p.removeImplicit(keyContext) + hash[key] = value + return + } if p.isImplicit(keyContext) { p.removeImplicit(keyContext) return @@ -449,6 +569,7 @@ func (p *parser) setValue(key string, value interface{}) { // key, which is *always* wrong. p.panicf("Key '%s' has already been defined.", keyContext) } + hash[key] = value } @@ -468,21 +589,15 @@ func (p *parser) setType(key string, typ tomlType) { p.types[keyContext.String()] = typ } -// addImplicit sets the given Key as having been created implicitly. -func (p *parser) addImplicit(key Key) { - p.implicits[key.String()] = true -} - -// removeImplicit stops tagging the given key as having been implicitly -// created. -func (p *parser) removeImplicit(key Key) { - p.implicits[key.String()] = false -} - -// isImplicit returns true if the key group pointed to by the key was created -// implicitly. -func (p *parser) isImplicit(key Key) bool { - return p.implicits[key.String()] +// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and +// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). +func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = true } +func (p *parser) removeImplicit(key Key) { p.implicits[key.String()] = false } +func (p *parser) isImplicit(key Key) bool { return p.implicits[key.String()] } +func (p *parser) isArray(key Key) bool { return p.types[key.String()] == tomlArray } +func (p *parser) addImplicitContext(key Key) { + p.addImplicit(key) + p.addContext(key, false) } // current returns the full key name of the current context. @@ -497,20 +612,54 @@ func (p *parser) current() string { } func stripFirstNewline(s string) string { - if len(s) == 0 || s[0] != '\n' { - return s + if len(s) > 0 && s[0] == '\n' { + return s[1:] + } + if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { + return s[2:] } - return s[1:] + return s } -func stripEscapedWhitespace(s string) string { - esc := strings.Split(s, "\\\n") - if len(esc) > 1 { - for i := 1; i < len(esc); i++ { - esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) +// Remove newlines inside triple-quoted strings if a line ends with "\". +func stripEscapedNewlines(s string) string { + split := strings.Split(s, "\n") + if len(split) < 1 { + return s + } + + escNL := false // Keep track of the last non-blank line was escaped. + for i, line := range split { + line = strings.TrimRight(line, " \t\r") + + if len(line) == 0 || line[len(line)-1] != '\\' { + split[i] = strings.TrimRight(split[i], "\r") + if !escNL && i != len(split)-1 { + split[i] += "\n" + } + continue + } + + escBS := true + for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- { + escBS = !escBS + } + if escNL { + line = strings.TrimLeft(line, " \t\r") + } + escNL = !escBS + + if escBS { + split[i] += "\n" + continue + } + + split[i] = line[:len(line)-1] // Remove \ + if len(split)-1 > i { + split[i+1] = strings.TrimLeft(split[i+1], " \t\r") } } - return strings.Join(esc, "") + return strings.Join(split, "") } func (p *parser) replaceEscapes(str string) string { @@ -533,6 +682,9 @@ func (p *parser) replaceEscapes(str string) string { default: p.bug("Expected valid escape code after \\, but got %q.", s[r]) return "" + case ' ', '\t': + p.panicf("invalid escape: '\\%c'", s[r]) + return "" case 'b': replaced = append(replaced, rune(0x0008)) r += 1 @@ -585,8 +737,3 @@ func (p *parser) asciiEscapeToUnicode(bs []byte) rune { } return rune(hex) } - -func isStringType(ty itemType) bool { - return ty == itemString || ty == itemMultilineString || - ty == itemRawString || ty == itemRawMultilineString -} |