diff options
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r-- | vendor/github.com/BurntSushi/toml/lex.go | 352 |
1 files changed, 173 insertions, 179 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go index adc4eb5d5..63ef20f47 100644 --- a/vendor/github.com/BurntSushi/toml/lex.go +++ b/vendor/github.com/BurntSushi/toml/lex.go @@ -37,28 +37,14 @@ const ( itemInlineTableEnd ) -const ( - eof = 0 - comma = ',' - tableStart = '[' - tableEnd = ']' - arrayTableStart = '[' - arrayTableEnd = ']' - tableSep = '.' - keySep = '=' - arrayStart = '[' - arrayEnd = ']' - commentStart = '#' - stringStart = '"' - stringEnd = '"' - rawStringStart = '\'' - rawStringEnd = '\'' - inlineTableStart = '{' - inlineTableEnd = '}' -) +const eof = 0 type stateFn func(lx *lexer) stateFn +func (p Position) String() string { + return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len) +} + type lexer struct { input string start int @@ -67,26 +53,26 @@ type lexer struct { state stateFn items chan item - // Allow for backing up up to four runes. - // This is necessary because TOML contains 3-rune tokens (""" and '''). + // Allow for backing up up to 4 runes. This is necessary because TOML + // contains 3-rune tokens (""" and '''). prevWidths [4]int - nprev int // how many of prevWidths are in use - // If we emit an eof, we can still back up, but it is not OK to call - // next again. - atEOF bool + nprev int // how many of prevWidths are in use + atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again. // A stack of state functions used to maintain context. - // The idea is to reuse parts of the state machine in various places. - // For example, values can appear at the top level or within arbitrarily - // nested arrays. The last state on the stack is used after a value has - // been lexed. Similarly for comments. + // + // The idea is to reuse parts of the state machine in various places. For + // example, values can appear at the top level or within arbitrarily nested + // arrays. The last state on the stack is used after a value has been lexed. + // Similarly for comments. stack []stateFn } type item struct { - typ itemType - val string - line int + typ itemType + val string + err error + pos Position } func (lx *lexer) nextItem() item { @@ -96,7 +82,7 @@ func (lx *lexer) nextItem() item { return item default: lx.state = lx.state(lx) - //fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack) + //fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack) } } } @@ -105,9 +91,9 @@ func lex(input string) *lexer { lx := &lexer{ input: input, state: lexTop, - line: 1, items: make(chan item, 10), stack: make([]stateFn, 0, 10), + line: 1, } return lx } @@ -129,13 +115,25 @@ func (lx *lexer) current() string { return lx.input[lx.start:lx.pos] } +func (lx lexer) getPos() Position { + p := Position{ + Line: lx.line, + Start: lx.start, + Len: lx.pos - lx.start, + } + if p.Len <= 0 { + p.Len = 1 + } + return p +} + func (lx *lexer) emit(typ itemType) { - lx.items <- item{typ, lx.current(), lx.line} + lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()} lx.start = lx.pos } func (lx *lexer) emitTrim(typ itemType) { - lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} + lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())} lx.start = lx.pos } @@ -160,7 +158,13 @@ func (lx *lexer) next() (r rune) { r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) if r == utf8.RuneError { - lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos]) + lx.error(errLexUTF8{lx.input[lx.pos]}) + return utf8.RuneError + } + + // Note: don't use peek() here, as this calls next(). + if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) { + lx.errorControlChar(r) return utf8.RuneError } @@ -188,6 +192,7 @@ func (lx *lexer) backup() { lx.prevWidths[1] = lx.prevWidths[2] lx.prevWidths[2] = lx.prevWidths[3] lx.nprev-- + lx.pos -= w if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { lx.line-- @@ -223,18 +228,58 @@ func (lx *lexer) skip(pred func(rune) bool) { } } -// errorf stops all lexing by emitting an error and returning `nil`. +// error stops all lexing by emitting an error and returning `nil`. +// // Note that any value that is a character is escaped if it's a special // character (newlines, tabs, etc.). +func (lx *lexer) error(err error) stateFn { + if lx.atEOF { + return lx.errorPrevLine(err) + } + lx.items <- item{typ: itemError, pos: lx.getPos(), err: err} + return nil +} + +// errorfPrevline is like error(), but sets the position to the last column of +// the previous line. +// +// This is so that unexpected EOF or NL errors don't show on a new blank line. +func (lx *lexer) errorPrevLine(err error) stateFn { + pos := lx.getPos() + pos.Line-- + pos.Len = 1 + pos.Start = lx.pos - 1 + lx.items <- item{typ: itemError, pos: pos, err: err} + return nil +} + +// errorPos is like error(), but allows explicitly setting the position. +func (lx *lexer) errorPos(start, length int, err error) stateFn { + pos := lx.getPos() + pos.Start = start + pos.Len = length + lx.items <- item{typ: itemError, pos: pos, err: err} + return nil +} + +// errorf is like error, and creates a new error. func (lx *lexer) errorf(format string, values ...interface{}) stateFn { - lx.items <- item{ - itemError, - fmt.Sprintf(format, values...), - lx.line, + if lx.atEOF { + pos := lx.getPos() + pos.Line-- + pos.Len = 1 + pos.Start = lx.pos - 1 + lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)} + return nil } + lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)} return nil } +func (lx *lexer) errorControlChar(cc rune) stateFn { + return lx.errorPos(lx.pos-1, 1, errLexControl{cc}) +} + // lexTop consumes elements at the top level of TOML data. func lexTop(lx *lexer) stateFn { r := lx.next() @@ -242,10 +287,10 @@ func lexTop(lx *lexer) stateFn { return lexSkip(lx, lexTop) } switch r { - case commentStart: + case '#': lx.push(lexTop) return lexCommentStart - case tableStart: + case '[': return lexTableStart case eof: if lx.pos > lx.start { @@ -268,7 +313,7 @@ func lexTop(lx *lexer) stateFn { func lexTopEnd(lx *lexer) stateFn { r := lx.next() switch { - case r == commentStart: + case r == '#': // a comment will read to a newline for us. lx.push(lexTop) return lexCommentStart @@ -292,7 +337,7 @@ func lexTopEnd(lx *lexer) stateFn { // It also handles the case that this is an item in an array of tables. // e.g., '[[name]]'. func lexTableStart(lx *lexer) stateFn { - if lx.peek() == arrayTableStart { + if lx.peek() == '[' { lx.next() lx.emit(itemArrayTableStart) lx.push(lexArrayTableEnd) @@ -309,10 +354,8 @@ func lexTableEnd(lx *lexer) stateFn { } func lexArrayTableEnd(lx *lexer) stateFn { - if r := lx.next(); r != arrayTableEnd { - return lx.errorf( - "expected end of table array name delimiter %q, but got %q instead", - arrayTableEnd, r) + if r := lx.next(); r != ']' { + return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r) } lx.emit(itemArrayTableEnd) return lexTopEnd @@ -321,11 +364,11 @@ func lexArrayTableEnd(lx *lexer) stateFn { func lexTableNameStart(lx *lexer) stateFn { lx.skip(isWhitespace) switch r := lx.peek(); { - case r == tableEnd || r == eof: + case r == ']' || r == eof: return lx.errorf("unexpected end of table name (table names cannot be empty)") - case r == tableSep: + case r == '.': return lx.errorf("unexpected table separator (table names cannot be empty)") - case r == stringStart || r == rawStringStart: + case r == '"' || r == '\'': lx.ignore() lx.push(lexTableNameEnd) return lexQuotedName @@ -342,10 +385,10 @@ func lexTableNameEnd(lx *lexer) stateFn { switch r := lx.next(); { case isWhitespace(r): return lexTableNameEnd - case r == tableSep: + case r == '.': lx.ignore() return lexTableNameStart - case r == tableEnd: + case r == ']': return lx.pop() default: return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r) @@ -379,10 +422,10 @@ func lexQuotedName(lx *lexer) stateFn { switch { case isWhitespace(r): return lexSkip(lx, lexValue) - case r == stringStart: + case r == '"': lx.ignore() // ignore the '"' return lexString - case r == rawStringStart: + case r == '\'': lx.ignore() // ignore the "'" return lexRawString case r == eof: @@ -400,7 +443,7 @@ func lexKeyStart(lx *lexer) stateFn { return lx.errorf("unexpected '=': key name appears blank") case r == '.': return lx.errorf("unexpected '.': keys cannot start with a '.'") - case r == stringStart || r == rawStringStart: + case r == '"' || r == '\'': lx.ignore() fallthrough default: // Bare key @@ -416,7 +459,7 @@ func lexKeyNameStart(lx *lexer) stateFn { return lx.errorf("unexpected '='") case r == '.': return lx.errorf("unexpected '.'") - case r == stringStart || r == rawStringStart: + case r == '"' || r == '\'': lx.ignore() lx.push(lexKeyEnd) return lexQuotedName @@ -434,7 +477,7 @@ func lexKeyEnd(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexKeyEnd) case r == eof: - return lx.errorf("unexpected EOF; expected key separator %q", keySep) + return lx.errorf("unexpected EOF; expected key separator '='") case r == '.': lx.ignore() return lexKeyNameStart @@ -461,17 +504,17 @@ func lexValue(lx *lexer) stateFn { return lexNumberOrDateStart } switch r { - case arrayStart: + case '[': lx.ignore() lx.emit(itemArray) return lexArrayValue - case inlineTableStart: + case '{': lx.ignore() lx.emit(itemInlineTableStart) return lexInlineTableValue - case stringStart: - if lx.accept(stringStart) { - if lx.accept(stringStart) { + case '"': + if lx.accept('"') { + if lx.accept('"') { lx.ignore() // Ignore """ return lexMultilineString } @@ -479,9 +522,9 @@ func lexValue(lx *lexer) stateFn { } lx.ignore() // ignore the '"' return lexString - case rawStringStart: - if lx.accept(rawStringStart) { - if lx.accept(rawStringStart) { + case '\'': + if lx.accept('\'') { + if lx.accept('\'') { lx.ignore() // Ignore """ return lexMultilineRawString } @@ -520,14 +563,12 @@ func lexArrayValue(lx *lexer) stateFn { switch { case isWhitespace(r) || isNL(r): return lexSkip(lx, lexArrayValue) - case r == commentStart: + case r == '#': lx.push(lexArrayValue) return lexCommentStart - case r == comma: + case r == ',': return lx.errorf("unexpected comma") - case r == arrayEnd: - // NOTE(caleb): The spec isn't clear about whether you can have - // a trailing comma or not, so we'll allow it. + case r == ']': return lexArrayEnd } @@ -540,22 +581,20 @@ func lexArrayValue(lx *lexer) stateFn { // the next value (or the end of the array): it ignores whitespace and newlines // and expects either a ',' or a ']'. func lexArrayValueEnd(lx *lexer) stateFn { - r := lx.next() - switch { + switch r := lx.next(); { case isWhitespace(r) || isNL(r): return lexSkip(lx, lexArrayValueEnd) - case r == commentStart: + case r == '#': lx.push(lexArrayValueEnd) return lexCommentStart - case r == comma: + case r == ',': lx.ignore() return lexArrayValue // move on to the next value - case r == arrayEnd: + case r == ']': return lexArrayEnd + default: + return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r)) } - return lx.errorf( - "expected a comma or array terminator %q, but got %s instead", - arrayEnd, runeOrEOF(r)) } // lexArrayEnd finishes the lexing of an array. @@ -574,13 +613,13 @@ func lexInlineTableValue(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexInlineTableValue) case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: + return lx.errorPrevLine(errLexInlineTableNL{}) + case r == '#': lx.push(lexInlineTableValue) return lexCommentStart - case r == comma: + case r == ',': return lx.errorf("unexpected comma") - case r == inlineTableEnd: + case r == '}': return lexInlineTableEnd } lx.backup() @@ -596,23 +635,21 @@ func lexInlineTableValueEnd(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexInlineTableValueEnd) case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: + return lx.errorPrevLine(errLexInlineTableNL{}) + case r == '#': lx.push(lexInlineTableValueEnd) return lexCommentStart - case r == comma: + case r == ',': lx.ignore() lx.skip(isWhitespace) if lx.peek() == '}' { return lx.errorf("trailing comma not allowed in inline tables") } return lexInlineTableValue - case r == inlineTableEnd: + case r == '}': return lexInlineTableEnd default: - return lx.errorf( - "expected a comma or an inline table terminator %q, but got %s instead", - inlineTableEnd, runeOrEOF(r)) + return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r)) } } @@ -638,14 +675,12 @@ func lexString(lx *lexer) stateFn { switch { case r == eof: return lx.errorf(`unexpected EOF; expected '"'`) - case isControl(r) || r == '\r': - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) case isNL(r): - return lx.errorf("strings cannot contain newlines") + return lx.errorPrevLine(errLexStringNL{}) case r == '\\': lx.push(lexString) return lexStringEscape - case r == stringEnd: + case r == '"': lx.backup() lx.emit(itemString) lx.next() @@ -660,23 +695,20 @@ func lexString(lx *lexer) stateFn { func lexMultilineString(lx *lexer) stateFn { r := lx.next() switch r { + default: + return lexMultilineString case eof: return lx.errorf(`unexpected EOF; expected '"""'`) - case '\r': - if lx.peek() != '\n' { - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) - } - return lexMultilineString case '\\': return lexMultilineStringEscape - case stringEnd: + case '"': /// Found " → try to read two more "". - if lx.accept(stringEnd) { - if lx.accept(stringEnd) { + if lx.accept('"') { + if lx.accept('"') { /// Peek ahead: the string can contain " and "", including at the /// end: """str""""" /// 6 or more at the end, however, is an error. - if lx.peek() == stringEnd { + if lx.peek() == '"' { /// Check if we already lexed 5 's; if so we have 6 now, and /// that's just too many man! if strings.HasSuffix(lx.current(), `"""""`) { @@ -699,12 +731,8 @@ func lexMultilineString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineString } - - if isControl(r) { - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) - } - return lexMultilineString } // lexRawString consumes a raw string. Nothing can be escaped in such a string. @@ -712,20 +740,19 @@ func lexMultilineString(lx *lexer) stateFn { func lexRawString(lx *lexer) stateFn { r := lx.next() switch { + default: + return lexRawString case r == eof: return lx.errorf(`unexpected EOF; expected "'"`) - case isControl(r) || r == '\r': - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) case isNL(r): - return lx.errorf("strings cannot contain newlines") - case r == rawStringEnd: + return lx.errorPrevLine(errLexStringNL{}) + case r == '\'': lx.backup() lx.emit(itemRawString) lx.next() lx.ignore() return lx.pop() } - return lexRawString } // lexMultilineRawString consumes a raw string. Nothing can be escaped in such @@ -734,21 +761,18 @@ func lexRawString(lx *lexer) stateFn { func lexMultilineRawString(lx *lexer) stateFn { r := lx.next() switch r { + default: + return lexMultilineRawString case eof: return lx.errorf(`unexpected EOF; expected "'''"`) - case '\r': - if lx.peek() != '\n' { - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) - } - return lexMultilineRawString - case rawStringEnd: + case '\'': /// Found ' → try to read two more ''. - if lx.accept(rawStringEnd) { - if lx.accept(rawStringEnd) { + if lx.accept('\'') { + if lx.accept('\'') { /// Peek ahead: the string can contain ' and '', including at the /// end: '''str''''' /// 6 or more at the end, however, is an error. - if lx.peek() == rawStringEnd { + if lx.peek() == '\'' { /// Check if we already lexed 5 's; if so we have 6 now, and /// that's just too many man! if strings.HasSuffix(lx.current(), "'''''") { @@ -771,12 +795,8 @@ func lexMultilineRawString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineRawString } - - if isControl(r) { - return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r) - } - return lexMultilineRawString } // lexMultilineStringEscape consumes an escaped character. It assumes that the @@ -817,8 +837,7 @@ func lexStringEscape(lx *lexer) stateFn { case 'U': return lexLongUnicodeEscape } - return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+ - `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) + return lx.error(errLexEscape{r}) } func lexShortUnicodeEscape(lx *lexer) stateFn { @@ -1108,8 +1127,6 @@ func lexComment(lx *lexer) stateFn { lx.backup() lx.emit(itemText) return lx.pop() - case isControl(r): - return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r) default: return lexComment } @@ -1121,52 +1138,6 @@ func lexSkip(lx *lexer, nextState stateFn) stateFn { return nextState } -// isWhitespace returns true if `r` is a whitespace character according -// to the spec. -func isWhitespace(r rune) bool { - return r == '\t' || r == ' ' -} - -func isNL(r rune) bool { - return r == '\n' || r == '\r' -} - -// Control characters except \n, \t -func isControl(r rune) bool { - switch r { - case '\t', '\r', '\n': - return false - default: - return (r >= 0x00 && r <= 0x1f) || r == 0x7f - } -} - -func isDigit(r rune) bool { - return r >= '0' && r <= '9' -} - -func isHexadecimal(r rune) bool { - return (r >= '0' && r <= '9') || - (r >= 'a' && r <= 'f') || - (r >= 'A' && r <= 'F') -} - -func isOctal(r rune) bool { - return r >= '0' && r <= '7' -} - -func isBinary(r rune) bool { - return r == '0' || r == '1' -} - -func isBareKeyChar(r rune) bool { - return (r >= 'A' && r <= 'Z') || - (r >= 'a' && r <= 'z') || - (r >= '0' && r <= '9') || - r == '_' || - r == '-' -} - func (s stateFn) String() string { name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name() if i := strings.LastIndexByte(name, '.'); i > -1 { @@ -1223,3 +1194,26 @@ func (itype itemType) String() string { func (item item) String() string { return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) } + +func isWhitespace(r rune) bool { return r == '\t' || r == ' ' } +func isNL(r rune) bool { return r == '\n' || r == '\r' } +func isControl(r rune) bool { // Control characters except \t, \r, \n + switch r { + case '\t', '\r', '\n': + return false + default: + return (r >= 0x00 && r <= 0x1f) || r == 0x7f + } +} +func isDigit(r rune) bool { return r >= '0' && r <= '9' } +func isBinary(r rune) bool { return r == '0' || r == '1' } +func isOctal(r rune) bool { return r >= '0' && r <= '7' } +func isHexadecimal(r rune) bool { + return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F') +} +func isBareKeyChar(r rune) bool { + return (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= '0' && r <= '9') || + r == '_' || r == '-' +} |