aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/BurntSushi/toml/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r--vendor/github.com/BurntSushi/toml/lex.go494
1 files changed, 288 insertions, 206 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go
index 9b20b3a81..e0a742a88 100644
--- a/vendor/github.com/BurntSushi/toml/lex.go
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -3,6 +3,7 @@ package toml
import (
"fmt"
"strings"
+ "unicode"
"unicode/utf8"
)
@@ -29,24 +30,28 @@ const (
itemArrayTableEnd
itemKeyStart
itemCommentStart
+ itemInlineTableStart
+ itemInlineTableEnd
)
const (
- eof = 0
- tableStart = '['
- tableEnd = ']'
- arrayTableStart = '['
- arrayTableEnd = ']'
- tableSep = '.'
- keySep = '='
- arrayStart = '['
- arrayEnd = ']'
- arrayValTerm = ','
- commentStart = '#'
- stringStart = '"'
- stringEnd = '"'
- rawStringStart = '\''
- rawStringEnd = '\''
+ eof = 0
+ comma = ','
+ tableStart = '['
+ tableEnd = ']'
+ arrayTableStart = '['
+ arrayTableEnd = ']'
+ tableSep = '.'
+ keySep = '='
+ arrayStart = '['
+ arrayEnd = ']'
+ commentStart = '#'
+ stringStart = '"'
+ stringEnd = '"'
+ rawStringStart = '\''
+ rawStringEnd = '\''
+ inlineTableStart = '{'
+ inlineTableEnd = '}'
)
type stateFn func(lx *lexer) stateFn
@@ -55,11 +60,18 @@ type lexer struct {
input string
start int
pos int
- width int
line int
state stateFn
items chan item
+ // Allow for backing up up to three runes.
+ // This is necessary because TOML contains 3-rune tokens (""" and ''').
+ prevWidths [3]int
+ nprev int // how many of prevWidths are in use
+ // If we emit an eof, we can still back up, but it is not OK to call
+ // next again.
+ atEOF bool
+
// A stack of state functions used to maintain context.
// The idea is to reuse parts of the state machine in various places.
// For example, values can appear at the top level or within arbitrarily
@@ -87,7 +99,7 @@ func (lx *lexer) nextItem() item {
func lex(input string) *lexer {
lx := &lexer{
- input: input + "\n",
+ input: input,
state: lexTop,
line: 1,
items: make(chan item, 10),
@@ -102,7 +114,7 @@ func (lx *lexer) push(state stateFn) {
func (lx *lexer) pop() stateFn {
if len(lx.stack) == 0 {
- return lx.errorf("BUG in lexer: no states to pop.")
+ return lx.errorf("BUG in lexer: no states to pop")
}
last := lx.stack[len(lx.stack)-1]
lx.stack = lx.stack[0 : len(lx.stack)-1]
@@ -124,16 +136,25 @@ func (lx *lexer) emitTrim(typ itemType) {
}
func (lx *lexer) next() (r rune) {
+ if lx.atEOF {
+ panic("next called after EOF")
+ }
if lx.pos >= len(lx.input) {
- lx.width = 0
+ lx.atEOF = true
return eof
}
if lx.input[lx.pos] == '\n' {
lx.line++
}
- r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
- lx.pos += lx.width
+ lx.prevWidths[2] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[0]
+ if lx.nprev < 3 {
+ lx.nprev++
+ }
+ r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
+ lx.prevWidths[0] = w
+ lx.pos += w
return r
}
@@ -142,9 +163,20 @@ func (lx *lexer) ignore() {
lx.start = lx.pos
}
-// backup steps back one rune. Can be called only once per call of next.
+// backup steps back one rune. Can be called only twice between calls to next.
func (lx *lexer) backup() {
- lx.pos -= lx.width
+ if lx.atEOF {
+ lx.atEOF = false
+ return
+ }
+ if lx.nprev < 1 {
+ panic("backed up too far")
+ }
+ w := lx.prevWidths[0]
+ lx.prevWidths[0] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[2]
+ lx.nprev--
+ lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
}
@@ -166,9 +198,22 @@ func (lx *lexer) peek() rune {
return r
}
+// skip ignores all input that matches the given predicate.
+func (lx *lexer) skip(pred func(rune) bool) {
+ for {
+ r := lx.next()
+ if pred(r) {
+ continue
+ }
+ lx.backup()
+ lx.ignore()
+ return
+ }
+}
+
// errorf stops all lexing by emitting an error and returning `nil`.
// Note that any value that is a character is escaped if it's a special
-// character (new lines, tabs, etc.).
+// character (newlines, tabs, etc.).
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
lx.items <- item{
itemError,
@@ -184,7 +229,6 @@ func lexTop(lx *lexer) stateFn {
if isWhitespace(r) || isNL(r) {
return lexSkip(lx, lexTop)
}
-
switch r {
case commentStart:
lx.push(lexTop)
@@ -193,7 +237,7 @@ func lexTop(lx *lexer) stateFn {
return lexTableStart
case eof:
if lx.pos > lx.start {
- return lx.errorf("Unexpected EOF.")
+ return lx.errorf("unexpected EOF")
}
lx.emit(itemEOF)
return nil
@@ -208,12 +252,12 @@ func lexTop(lx *lexer) stateFn {
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
// or a table.) It must see only whitespace, and will turn back to lexTop
-// upon a new line. If it sees EOF, it will quit the lexer successfully.
+// upon a newline. If it sees EOF, it will quit the lexer successfully.
func lexTopEnd(lx *lexer) stateFn {
r := lx.next()
switch {
case r == commentStart:
- // a comment will read to a new line for us.
+ // a comment will read to a newline for us.
lx.push(lexTop)
return lexCommentStart
case isWhitespace(r):
@@ -222,11 +266,11 @@ func lexTopEnd(lx *lexer) stateFn {
lx.ignore()
return lexTop
case r == eof:
- lx.ignore()
- return lexTop
+ lx.emit(itemEOF)
+ return nil
}
- return lx.errorf("Expected a top-level item to end with a new line, "+
- "comment or EOF, but got %q instead.", r)
+ return lx.errorf("expected a top-level item to end with a newline, "+
+ "comment, or EOF, but got %q instead", r)
}
// lexTable lexes the beginning of a table. Namely, it makes sure that
@@ -253,21 +297,22 @@ func lexTableEnd(lx *lexer) stateFn {
func lexArrayTableEnd(lx *lexer) stateFn {
if r := lx.next(); r != arrayTableEnd {
- return lx.errorf("Expected end of table array name delimiter %q, "+
- "but got %q instead.", arrayTableEnd, r)
+ return lx.errorf("expected end of table array name delimiter %q, "+
+ "but got %q instead", arrayTableEnd, r)
}
lx.emit(itemArrayTableEnd)
return lexTopEnd
}
func lexTableNameStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == tableEnd || r == eof:
- return lx.errorf("Unexpected end of table name. (Table names cannot " +
- "be empty.)")
+ return lx.errorf("unexpected end of table name " +
+ "(table names cannot be empty)")
case r == tableSep:
- return lx.errorf("Unexpected table separator. (Table names cannot " +
- "be empty.)")
+ return lx.errorf("unexpected table separator " +
+ "(table names cannot be empty)")
case r == stringStart || r == rawStringStart:
lx.ignore()
lx.push(lexTableNameEnd)
@@ -277,24 +322,22 @@ func lexTableNameStart(lx *lexer) stateFn {
}
}
-// lexTableName lexes the name of a table. It assumes that at least one
+// lexBareTableName lexes the name of a table. It assumes that at least one
// valid character for the table has already been read.
func lexBareTableName(lx *lexer) stateFn {
- switch r := lx.next(); {
- case isBareKeyChar(r):
+ r := lx.next()
+ if isBareKeyChar(r) {
return lexBareTableName
- case r == tableSep || r == tableEnd:
- lx.backup()
- lx.emitTrim(itemText)
- return lexTableNameEnd
- default:
- return lx.errorf("Bare keys cannot contain %q.", r)
}
+ lx.backup()
+ lx.emit(itemText)
+ return lexTableNameEnd
}
// lexTableNameEnd reads the end of a piece of a table name, optionally
// consuming whitespace.
func lexTableNameEnd(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
switch r := lx.next(); {
case isWhitespace(r):
return lexTableNameEnd
@@ -304,8 +347,8 @@ func lexTableNameEnd(lx *lexer) stateFn {
case r == tableEnd:
return lx.pop()
default:
- return lx.errorf("Expected '.' or ']' to end table name, but got %q "+
- "instead.", r)
+ return lx.errorf("expected '.' or ']' to end table name, "+
+ "but got %q instead", r)
}
}
@@ -315,7 +358,7 @@ func lexKeyStart(lx *lexer) stateFn {
r := lx.peek()
switch {
case r == keySep:
- return lx.errorf("Unexpected key separator %q.", keySep)
+ return lx.errorf("unexpected key separator %q", keySep)
case isWhitespace(r) || isNL(r):
lx.next()
return lexSkip(lx, lexKeyStart)
@@ -338,14 +381,15 @@ func lexBareKey(lx *lexer) stateFn {
case isBareKeyChar(r):
return lexBareKey
case isWhitespace(r):
- lx.emitTrim(itemText)
+ lx.backup()
+ lx.emit(itemText)
return lexKeyEnd
case r == keySep:
lx.backup()
- lx.emitTrim(itemText)
+ lx.emit(itemText)
return lexKeyEnd
default:
- return lx.errorf("Bare keys cannot contain %q.", r)
+ return lx.errorf("bare keys cannot contain %q", r)
}
}
@@ -358,7 +402,7 @@ func lexKeyEnd(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexKeyEnd)
default:
- return lx.errorf("Expected key separator %q, but got %q instead.",
+ return lx.errorf("expected key separator %q, but got %q instead",
keySep, r)
}
}
@@ -367,20 +411,26 @@ func lexKeyEnd(lx *lexer) stateFn {
// lexValue will ignore whitespace.
// After a value is lexed, the last state on the next is popped and returned.
func lexValue(lx *lexer) stateFn {
- // We allow whitespace to precede a value, but NOT new lines.
- // In array syntax, the array states are responsible for ignoring new
- // lines.
+ // We allow whitespace to precede a value, but NOT newlines.
+ // In array syntax, the array states are responsible for ignoring newlines.
r := lx.next()
- if isWhitespace(r) {
+ switch {
+ case isWhitespace(r):
return lexSkip(lx, lexValue)
+ case isDigit(r):
+ lx.backup() // avoid an extra state and use the same as above
+ return lexNumberOrDateStart
}
-
- switch {
- case r == arrayStart:
+ switch r {
+ case arrayStart:
lx.ignore()
lx.emit(itemArray)
return lexArrayValue
- case r == stringStart:
+ case inlineTableStart:
+ lx.ignore()
+ lx.emit(itemInlineTableStart)
+ return lexInlineTableValue
+ case stringStart:
if lx.accept(stringStart) {
if lx.accept(stringStart) {
lx.ignore() // Ignore """
@@ -390,7 +440,7 @@ func lexValue(lx *lexer) stateFn {
}
lx.ignore() // ignore the '"'
return lexString
- case r == rawStringStart:
+ case rawStringStart:
if lx.accept(rawStringStart) {
if lx.accept(rawStringStart) {
lx.ignore() // Ignore """
@@ -400,23 +450,24 @@ func lexValue(lx *lexer) stateFn {
}
lx.ignore() // ignore the "'"
return lexRawString
- case r == 't':
- return lexTrue
- case r == 'f':
- return lexFalse
- case r == '-':
+ case '+', '-':
return lexNumberStart
- case isDigit(r):
- lx.backup() // avoid an extra state and use the same as above
- return lexNumberOrDateStart
- case r == '.': // special error case, be kind to users
- return lx.errorf("Floats must start with a digit, not '.'.")
+ case '.': // special error case, be kind to users
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+ if unicode.IsLetter(r) {
+ // Be permissive here; lexBool will give a nice error if the
+ // user wrote something like
+ // x = foo
+ // (i.e. not 'true' or 'false' but is something else word-like.)
+ lx.backup()
+ return lexBool
}
- return lx.errorf("Expected value but found %q instead.", r)
+ return lx.errorf("expected value but found %q instead", r)
}
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
-// have already been consumed. All whitespace and new lines are ignored.
+// have already been consumed. All whitespace and newlines are ignored.
func lexArrayValue(lx *lexer) stateFn {
r := lx.next()
switch {
@@ -425,10 +476,11 @@ func lexArrayValue(lx *lexer) stateFn {
case r == commentStart:
lx.push(lexArrayValue)
return lexCommentStart
- case r == arrayValTerm:
- return lx.errorf("Unexpected array value terminator %q.",
- arrayValTerm)
+ case r == comma:
+ return lx.errorf("unexpected comma")
case r == arrayEnd:
+ // NOTE(caleb): The spec isn't clear about whether you can have
+ // a trailing comma or not, so we'll allow it.
return lexArrayEnd
}
@@ -437,8 +489,9 @@ func lexArrayValue(lx *lexer) stateFn {
return lexValue
}
-// lexArrayValueEnd consumes the cruft between values of an array. Namely,
-// it ignores whitespace and expects either a ',' or a ']'.
+// lexArrayValueEnd consumes everything between the end of an array value and
+// the next value (or the end of the array): it ignores whitespace and newlines
+// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
r := lx.next()
switch {
@@ -447,31 +500,88 @@ func lexArrayValueEnd(lx *lexer) stateFn {
case r == commentStart:
lx.push(lexArrayValueEnd)
return lexCommentStart
- case r == arrayValTerm:
+ case r == comma:
lx.ignore()
return lexArrayValue // move on to the next value
case r == arrayEnd:
return lexArrayEnd
}
- return lx.errorf("Expected an array value terminator %q or an array "+
- "terminator %q, but got %q instead.", arrayValTerm, arrayEnd, r)
+ return lx.errorf(
+ "expected a comma or array terminator %q, but got %q instead",
+ arrayEnd, r,
+ )
}
-// lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
-// just been consumed.
+// lexArrayEnd finishes the lexing of an array.
+// It assumes that a ']' has just been consumed.
func lexArrayEnd(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemArrayEnd)
return lx.pop()
}
+// lexInlineTableValue consumes one key/value pair in an inline table.
+// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
+func lexInlineTableValue(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValue)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValue)
+ return lexCommentStart
+ case r == comma:
+ return lx.errorf("unexpected comma")
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ }
+ lx.backup()
+ lx.push(lexInlineTableValueEnd)
+ return lexKeyStart
+}
+
+// lexInlineTableValueEnd consumes everything between the end of an inline table
+// key/value pair and the next pair (or the end of the table):
+// it ignores whitespace and expects either a ',' or a '}'.
+func lexInlineTableValueEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValueEnd)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValueEnd)
+ return lexCommentStart
+ case r == comma:
+ lx.ignore()
+ return lexInlineTableValue
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ }
+ return lx.errorf("expected a comma or an inline table terminator %q, "+
+ "but got %q instead", inlineTableEnd, r)
+}
+
+// lexInlineTableEnd finishes the lexing of an inline table.
+// It assumes that a '}' has just been consumed.
+func lexInlineTableEnd(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemInlineTableEnd)
+ return lx.pop()
+}
+
// lexString consumes the inner contents of a string. It assumes that the
// beginning '"' has already been consumed and ignored.
func lexString(lx *lexer) stateFn {
r := lx.next()
switch {
+ case r == eof:
+ return lx.errorf("unexpected EOF")
case isNL(r):
- return lx.errorf("Strings cannot contain new lines.")
+ return lx.errorf("strings cannot contain newlines")
case r == '\\':
lx.push(lexString)
return lexStringEscape
@@ -488,11 +598,12 @@ func lexString(lx *lexer) stateFn {
// lexMultilineString consumes the inner contents of a string. It assumes that
// the beginning '"""' has already been consumed and ignored.
func lexMultilineString(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == '\\':
+ switch lx.next() {
+ case eof:
+ return lx.errorf("unexpected EOF")
+ case '\\':
return lexMultilineStringEscape
- case r == stringEnd:
+ case stringEnd:
if lx.accept(stringEnd) {
if lx.accept(stringEnd) {
lx.backup()
@@ -516,8 +627,10 @@ func lexMultilineString(lx *lexer) stateFn {
func lexRawString(lx *lexer) stateFn {
r := lx.next()
switch {
+ case r == eof:
+ return lx.errorf("unexpected EOF")
case isNL(r):
- return lx.errorf("Strings cannot contain new lines.")
+ return lx.errorf("strings cannot contain newlines")
case r == rawStringEnd:
lx.backup()
lx.emit(itemRawString)
@@ -529,12 +642,13 @@ func lexRawString(lx *lexer) stateFn {
}
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
-// a string. It assumes that the beginning "'" has already been consumed and
+// a string. It assumes that the beginning "'''" has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == rawStringEnd:
+ switch lx.next() {
+ case eof:
+ return lx.errorf("unexpected EOF")
+ case rawStringEnd:
if lx.accept(rawStringEnd) {
if lx.accept(rawStringEnd) {
lx.backup()
@@ -559,11 +673,10 @@ func lexMultilineStringEscape(lx *lexer) stateFn {
// Handle the special case first:
if isNL(lx.next()) {
return lexMultilineString
- } else {
- lx.backup()
- lx.push(lexMultilineString)
- return lexStringEscape(lx)
}
+ lx.backup()
+ lx.push(lexMultilineString)
+ return lexStringEscape(lx)
}
func lexStringEscape(lx *lexer) stateFn {
@@ -588,10 +701,9 @@ func lexStringEscape(lx *lexer) stateFn {
case 'U':
return lexLongUnicodeEscape
}
- return lx.errorf("Invalid escape character %q. Only the following "+
+ return lx.errorf("invalid escape character %q; only the following "+
"escape characters are allowed: "+
- "\\b, \\t, \\n, \\f, \\r, \\\", \\/, \\\\, "+
- "\\uXXXX and \\UXXXXXXXX.", r)
+ `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
}
func lexShortUnicodeEscape(lx *lexer) stateFn {
@@ -599,8 +711,8 @@ func lexShortUnicodeEscape(lx *lexer) stateFn {
for i := 0; i < 4; i++ {
r = lx.next()
if !isHexadecimal(r) {
- return lx.errorf("Expected four hexadecimal digits after '\\u', "+
- "but got '%s' instead.", lx.current())
+ return lx.errorf(`expected four hexadecimal digits after '\u', `+
+ "but got %q instead", lx.current())
}
}
return lx.pop()
@@ -611,40 +723,43 @@ func lexLongUnicodeEscape(lx *lexer) stateFn {
for i := 0; i < 8; i++ {
r = lx.next()
if !isHexadecimal(r) {
- return lx.errorf("Expected eight hexadecimal digits after '\\U', "+
- "but got '%s' instead.", lx.current())
+ return lx.errorf(`expected eight hexadecimal digits after '\U', `+
+ "but got %q instead", lx.current())
}
}
return lx.pop()
}
-// lexNumberOrDateStart consumes either a (positive) integer, float or
-// datetime. It assumes that NO negative sign has been consumed.
+// lexNumberOrDateStart consumes either an integer, a float, or datetime.
func lexNumberOrDateStart(lx *lexer) stateFn {
r := lx.next()
- if !isDigit(r) {
- if r == '.' {
- return lx.errorf("Floats must start with a digit, not '.'.")
- } else {
- return lx.errorf("Expected a digit but got %q.", r)
- }
+ if isDigit(r) {
+ return lexNumberOrDate
}
- return lexNumberOrDate
+ switch r {
+ case '_':
+ return lexNumber
+ case 'e', 'E':
+ return lexFloat
+ case '.':
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+ return lx.errorf("expected a digit but got %q", r)
}
-// lexNumberOrDate consumes either a (positive) integer, float or datetime.
+// lexNumberOrDate consumes either an integer, float or datetime.
func lexNumberOrDate(lx *lexer) stateFn {
r := lx.next()
- switch {
- case r == '-':
- if lx.pos-lx.start != 5 {
- return lx.errorf("All ISO8601 dates must be in full Zulu form.")
- }
- return lexDateAfterYear
- case isDigit(r):
+ if isDigit(r) {
return lexNumberOrDate
- case r == '.':
- return lexFloatStart
+ }
+ switch r {
+ case '-':
+ return lexDatetime
+ case '_':
+ return lexNumber
+ case '.', 'e', 'E':
+ return lexFloat
}
lx.backup()
@@ -652,46 +767,34 @@ func lexNumberOrDate(lx *lexer) stateFn {
return lx.pop()
}
-// lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
-// It assumes that "YYYY-" has already been consumed.
-func lexDateAfterYear(lx *lexer) stateFn {
- formats := []rune{
- // digits are '0'.
- // everything else is direct equality.
- '0', '0', '-', '0', '0',
- 'T',
- '0', '0', ':', '0', '0', ':', '0', '0',
- 'Z',
+// lexDatetime consumes a Datetime, to a first approximation.
+// The parser validates that it matches one of the accepted formats.
+func lexDatetime(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexDatetime
}
- for _, f := range formats {
- r := lx.next()
- if f == '0' {
- if !isDigit(r) {
- return lx.errorf("Expected digit in ISO8601 datetime, "+
- "but found %q instead.", r)
- }
- } else if f != r {
- return lx.errorf("Expected %q in ISO8601 datetime, "+
- "but found %q instead.", f, r)
- }
+ switch r {
+ case '-', 'T', ':', '.', 'Z', '+':
+ return lexDatetime
}
+
+ lx.backup()
lx.emit(itemDatetime)
return lx.pop()
}
-// lexNumberStart consumes either an integer or a float. It assumes that
-// a negative sign has already been read, but that *no* digits have been
-// consumed. lexNumberStart will move to the appropriate integer or float
-// states.
+// lexNumberStart consumes either an integer or a float. It assumes that a sign
+// has already been read, but that *no* digits have been consumed.
+// lexNumberStart will move to the appropriate integer or float states.
func lexNumberStart(lx *lexer) stateFn {
- // we MUST see a digit. Even floats have to start with a digit.
+ // We MUST see a digit. Even floats have to start with a digit.
r := lx.next()
if !isDigit(r) {
if r == '.' {
- return lx.errorf("Floats must start with a digit, not '.'.")
- } else {
- return lx.errorf("Expected a digit but got %q.", r)
+ return lx.errorf("floats must start with a digit, not '.'")
}
+ return lx.errorf("expected a digit but got %q", r)
}
return lexNumber
}
@@ -699,11 +802,14 @@ func lexNumberStart(lx *lexer) stateFn {
// lexNumber consumes an integer or a float after seeing the first digit.
func lexNumber(lx *lexer) stateFn {
r := lx.next()
- switch {
- case isDigit(r):
+ if isDigit(r) {
return lexNumber
- case r == '.':
- return lexFloatStart
+ }
+ switch r {
+ case '_':
+ return lexNumber
+ case '.', 'e', 'E':
+ return lexFloat
}
lx.backup()
@@ -711,60 +817,42 @@ func lexNumber(lx *lexer) stateFn {
return lx.pop()
}
-// lexFloatStart starts the consumption of digits of a float after a '.'.
-// Namely, at least one digit is required.
-func lexFloatStart(lx *lexer) stateFn {
- r := lx.next()
- if !isDigit(r) {
- return lx.errorf("Floats must have a digit after the '.', but got "+
- "%q instead.", r)
- }
- return lexFloat
-}
-
-// lexFloat consumes the digits of a float after a '.'.
-// Assumes that one digit has been consumed after a '.' already.
+// lexFloat consumes the elements of a float. It allows any sequence of
+// float-like characters, so floats emitted by the lexer are only a first
+// approximation and must be validated by the parser.
func lexFloat(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexFloat
}
+ switch r {
+ case '_', '.', '-', '+', 'e', 'E':
+ return lexFloat
+ }
lx.backup()
lx.emit(itemFloat)
return lx.pop()
}
-// lexConst consumes the s[1:] in s. It assumes that s[0] has already been
-// consumed.
-func lexConst(lx *lexer, s string) stateFn {
- for i := range s[1:] {
- if r := lx.next(); r != rune(s[i+1]) {
- return lx.errorf("Expected %q, but found %q instead.", s[:i+1],
- s[:i]+string(r))
+// lexBool consumes a bool string: 'true' or 'false.
+func lexBool(lx *lexer) stateFn {
+ var rs []rune
+ for {
+ r := lx.next()
+ if !unicode.IsLetter(r) {
+ lx.backup()
+ break
}
+ rs = append(rs, r)
}
- return nil
-}
-
-// lexTrue consumes the "rue" in "true". It assumes that 't' has already
-// been consumed.
-func lexTrue(lx *lexer) stateFn {
- if fn := lexConst(lx, "true"); fn != nil {
- return fn
- }
- lx.emit(itemBool)
- return lx.pop()
-}
-
-// lexFalse consumes the "alse" in "false". It assumes that 'f' has already
-// been consumed.
-func lexFalse(lx *lexer) stateFn {
- if fn := lexConst(lx, "false"); fn != nil {
- return fn
+ s := string(rs)
+ switch s {
+ case "true", "false":
+ lx.emit(itemBool)
+ return lx.pop()
}
- lx.emit(itemBool)
- return lx.pop()
+ return lx.errorf("expected value but found %q instead", s)
}
// lexCommentStart begins the lexing of a comment. It will emit
@@ -776,7 +864,7 @@ func lexCommentStart(lx *lexer) stateFn {
}
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
-// It will consume *up to* the first new line character, and pass control
+// It will consume *up to* the first newline character, and pass control
// back to the last state on the stack.
func lexComment(lx *lexer) stateFn {
r := lx.peek()
@@ -834,13 +922,7 @@ func (itype itemType) String() string {
return "EOF"
case itemText:
return "Text"
- case itemString:
- return "String"
- case itemRawString:
- return "String"
- case itemMultilineString:
- return "String"
- case itemRawMultilineString:
+ case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
return "String"
case itemBool:
return "Bool"