diff options
Diffstat (limited to 'pkg/util/camelcase/camelcase.go')
-rw-r--r-- | pkg/util/camelcase/camelcase.go | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/pkg/util/camelcase/camelcase.go b/pkg/util/camelcase/camelcase.go new file mode 100644 index 000000000..02160c9a4 --- /dev/null +++ b/pkg/util/camelcase/camelcase.go @@ -0,0 +1,90 @@ +// Package camelcase is a micro package to split the words of a camelcase type +// string into a slice of words. +package camelcase + +import ( + "unicode" + "unicode/utf8" +) + +// Split splits the camelcase word and returns a list of words. It also +// supports digits. Both lower camel case and upper camel case are supported. +// For more info please check: http://en.wikipedia.org/wiki/CamelCase +// +// Examples +// +// "" => [""] +// "lowercase" => ["lowercase"] +// "Class" => ["Class"] +// "MyClass" => ["My", "Class"] +// "MyC" => ["My", "C"] +// "HTML" => ["HTML"] +// "PDFLoader" => ["PDF", "Loader"] +// "AString" => ["A", "String"] +// "SimpleXMLParser" => ["Simple", "XML", "Parser"] +// "vimRPCPlugin" => ["vim", "RPC", "Plugin"] +// "GL11Version" => ["GL", "11", "Version"] +// "99Bottles" => ["99", "Bottles"] +// "May5" => ["May", "5"] +// "BFG9000" => ["BFG", "9000"] +// "BöseÜberraschung" => ["Böse", "Überraschung"] +// "Two spaces" => ["Two", " ", "spaces"] +// "BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"] +// +// Splitting rules +// +// 1) If string is not valid UTF-8, return it without splitting as +// single item array. +// 2) Assign all unicode characters into one of 4 sets: lower case +// letters, upper case letters, numbers, and all other characters. +// 3) Iterate through characters of string, introducing splits +// between adjacent characters that belong to different sets. +// 4) Iterate through array of split strings, and if a given string +// is upper case: +// if subsequent string is lower case: +// move last character of upper case string to beginning of +// lower case string +func Split(src string) (entries []string) { + // don't split invalid utf8 + if !utf8.ValidString(src) { + return []string{src} + } + entries = []string{} + var runes [][]rune + lastClass := 0 + class := 0 + // split into fields based on class of unicode character + for _, r := range src { + switch true { + case unicode.IsLower(r): + class = 1 + case unicode.IsUpper(r): + class = 2 + case unicode.IsDigit(r): + class = 3 + default: + class = 4 + } + if class == lastClass { + runes[len(runes)-1] = append(runes[len(runes)-1], r) + } else { + runes = append(runes, []rune{r}) + } + lastClass = class + } + // handle upper case -> lower case sequences, e.g. + // "PDFL", "oader" -> "PDF", "Loader" + for i := 0; i < len(runes)-1; i++ { + if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) { + runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...) + runes[i] = runes[i][:len(runes[i])-1] + } + } + // construct []string from results + for _, s := range runes { + if len(s) > 0 { + entries = append(entries, string(s)) + } + } + return +} |