1 Star 0 Fork 2

who7708 / etcd

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
text_parser.go 16.04 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// http://code.google.com/p/goprotobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
// Functions for parsing the Text protocol buffer format.
// TODO: message sets.
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"unicode/utf8"
)
type ParseError struct {
Message string
Line int // 1-based line number
Offset int // 0-based byte offset from start of input
}
func (p *ParseError) Error() string {
if p.Line == 1 {
// show offset only for first line
return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
}
return fmt.Sprintf("line %d: %v", p.Line, p.Message)
}
type token struct {
value string
err *ParseError
line int // line number
offset int // byte number from start of input, not start of line
unquoted string // the unquoted version of value, if it was a quoted string
}
func (t *token) String() string {
if t.err == nil {
return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
}
return fmt.Sprintf("parse error: %v", t.err)
}
type textParser struct {
s string // remaining input
done bool // whether the parsing is finished (success or error)
backed bool // whether back() was called
offset, line int
cur token
}
func newTextParser(s string) *textParser {
p := new(textParser)
p.s = s
p.line = 1
p.cur.line = 1
return p
}
func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
p.cur.err = pe
p.done = true
return pe
}
// Numbers and identifiers are matched by [-+._A-Za-z0-9]
func isIdentOrNumberChar(c byte) bool {
switch {
case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
return true
case '0' <= c && c <= '9':
return true
}
switch c {
case '-', '+', '.', '_':
return true
}
return false
}
func isWhitespace(c byte) bool {
switch c {
case ' ', '\t', '\n', '\r':
return true
}
return false
}
func (p *textParser) skipWhitespace() {
i := 0
for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
if p.s[i] == '#' {
// comment; skip to end of line or input
for i < len(p.s) && p.s[i] != '\n' {
i++
}
if i == len(p.s) {
break
}
}
if p.s[i] == '\n' {
p.line++
}
i++
}
p.offset += i
p.s = p.s[i:len(p.s)]
if len(p.s) == 0 {
p.done = true
}
}
func (p *textParser) advance() {
// Skip whitespace
p.skipWhitespace()
if p.done {
return
}
// Start of non-whitespace
p.cur.err = nil
p.cur.offset, p.cur.line = p.offset, p.line
p.cur.unquoted = ""
switch p.s[0] {
case '<', '>', '{', '}', ':', '[', ']', ';', ',':
// Single symbol
p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
case '"', '\'':
// Quoted string
i := 1
for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
if p.s[i] == '\\' && i+1 < len(p.s) {
// skip escaped char
i++
}
i++
}
if i >= len(p.s) || p.s[i] != p.s[0] {
p.errorf("unmatched quote")
return
}
unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
if err != nil {
p.errorf("invalid quoted string %v", p.s[0:i+1])
return
}
p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
p.cur.unquoted = unq
default:
i := 0
for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
i++
}
if i == 0 {
p.errorf("unexpected byte %#x", p.s[0])
return
}
p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
}
p.offset += len(p.cur.value)
}
var (
errBadUTF8 = errors.New("proto: bad UTF-8")
errBadHex = errors.New("proto: bad hexadecimal")
)
func unquoteC(s string, quote rune) (string, error) {
// This is based on C++'s tokenizer.cc.
// Despite its name, this is *not* parsing C syntax.
// For instance, "\0" is an invalid quoted string.
// Avoid allocation in trivial cases.
simple := true
for _, r := range s {
if r == '\\' || r == quote {
simple = false
break
}
}
if simple {
return s, nil
}
buf := make([]byte, 0, 3*len(s)/2)
for len(s) > 0 {
r, n := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && n == 1 {
return "", errBadUTF8
}
s = s[n:]
if r != '\\' {
if r < utf8.RuneSelf {
buf = append(buf, byte(r))
} else {
buf = append(buf, string(r)...)
}
continue
}
ch, tail, err := unescape(s)
if err != nil {
return "", err
}
buf = append(buf, ch...)
s = tail
}
return string(buf), nil
}
func unescape(s string) (ch string, tail string, err error) {
r, n := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && n == 1 {
return "", "", errBadUTF8
}
s = s[n:]
switch r {
case 'a':
return "\a", s, nil
case 'b':
return "\b", s, nil
case 'f':
return "\f", s, nil
case 'n':
return "\n", s, nil
case 'r':
return "\r", s, nil
case 't':
return "\t", s, nil
case 'v':
return "\v", s, nil
case '?':
return "?", s, nil // trigraph workaround
case '\'', '"', '\\':
return string(r), s, nil
case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
if len(s) < 2 {
return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
}
base := 8
ss := s[:2]
s = s[2:]
if r == 'x' || r == 'X' {
base = 16
} else {
ss = string(r) + ss
}
i, err := strconv.ParseUint(ss, base, 8)
if err != nil {
return "", "", err
}
return string([]byte{byte(i)}), s, nil
case 'u', 'U':
n := 4
if r == 'U' {
n = 8
}
if len(s) < n {
return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
}
bs := make([]byte, n/2)
for i := 0; i < n; i += 2 {
a, ok1 := unhex(s[i])
b, ok2 := unhex(s[i+1])
if !ok1 || !ok2 {
return "", "", errBadHex
}
bs[i/2] = a<<4 | b
}
s = s[n:]
return string(bs), s, nil
}
return "", "", fmt.Errorf(`unknown escape \%c`, r)
}
// Adapted from src/pkg/strconv/quote.go.
func unhex(b byte) (v byte, ok bool) {
switch {
case '0' <= b && b <= '9':
return b - '0', true
case 'a' <= b && b <= 'f':
return b - 'a' + 10, true
case 'A' <= b && b <= 'F':
return b - 'A' + 10, true
}
return 0, false
}
// Back off the parser by one token. Can only be done between calls to next().
// It makes the next advance() a no-op.
func (p *textParser) back() { p.backed = true }
// Advances the parser and returns the new current token.
func (p *textParser) next() *token {
if p.backed || p.done {
p.backed = false
return &p.cur
}
p.advance()
if p.done {
p.cur.value = ""
} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
// Look for multiple quoted strings separated by whitespace,
// and concatenate them.
cat := p.cur
for {
p.skipWhitespace()
if p.done || p.s[0] != '"' {
break
}
p.advance()
if p.cur.err != nil {
return &p.cur
}
cat.value += " " + p.cur.value
cat.unquoted += p.cur.unquoted
}
p.done = false // parser may have seen EOF, but we want to return cat
p.cur = cat
}
return &p.cur
}
// Return an error indicating which required field was not set.
func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
st := sv.Type()
sprops := GetProperties(st)
for i := 0; i < st.NumField(); i++ {
if !isNil(sv.Field(i)) {
continue
}
props := sprops.Prop[i]
if props.Required {
return p.errorf("message %v missing required field %q", st, props.OrigName)
}
}
return p.errorf("message %v missing required field", st) // should not happen
}
// Returns the index in the struct for the named field, as well as the parsed tag properties.
func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
sprops := GetProperties(st)
i, ok := sprops.decoderOrigNames[name]
if ok {
return i, sprops.Prop[i], true
}
return -1, nil, false
}
// Consume a ':' from the input stream (if the next token is a colon),
// returning an error if a colon is needed but not present.
func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value != ":" {
// Colon is optional when the field is a group or message.
needColon := true
switch props.Wire {
case "group":
needColon = false
case "bytes":
// A "bytes" field is either a message, a string, or a repeated field;
// those three become *T, *string and []T respectively, so we can check for
// this field being a pointer to a non-string.
if typ.Kind() == reflect.Ptr {
// *T or *string
if typ.Elem().Kind() == reflect.String {
break
}
} else if typ.Kind() == reflect.Slice {
// []T or []*T
if typ.Elem().Kind() != reflect.Ptr {
break
}
}
needColon = false
}
if needColon {
return p.errorf("expected ':', found %q", tok.value)
}
p.back()
}
return nil
}
func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
st := sv.Type()
reqCount := GetProperties(st).reqCount
// A struct is a sequence of "name: value", terminated by one of
// '>' or '}', or the end of the input. A name may also be
// "[extension]".
for {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value == terminator {
break
}
if tok.value == "[" {
// Looks like an extension.
//
// TODO: Check whether we need to handle
// namespace rooted names (e.g. ".something.Foo").
tok = p.next()
if tok.err != nil {
return tok.err
}
var desc *ExtensionDesc
// This could be faster, but it's functional.
// TODO: Do something smarter than a linear scan.
for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
if d.Name == tok.value {
desc = d
break
}
}
if desc == nil {
return p.errorf("unrecognized extension %q", tok.value)
}
// Check the extension terminator.
tok = p.next()
if tok.err != nil {
return tok.err
}
if tok.value != "]" {
return p.errorf("unrecognized extension terminator %q", tok.value)
}
props := &Properties{}
props.Parse(desc.Tag)
typ := reflect.TypeOf(desc.ExtensionType)
if err := p.checkForColon(props, typ); err != nil {
return err
}
rep := desc.repeated()
// Read the extension structure, and set it in
// the value we're constructing.
var ext reflect.Value
if !rep {
ext = reflect.New(typ).Elem()
} else {
ext = reflect.New(typ.Elem()).Elem()
}
if err := p.readAny(ext, props); err != nil {
return err
}
ep := sv.Addr().Interface().(extendableProto)
if !rep {
SetExtension(ep, desc, ext.Interface())
} else {
old, err := GetExtension(ep, desc)
var sl reflect.Value
if err == nil {
sl = reflect.ValueOf(old) // existing slice
} else {
sl = reflect.MakeSlice(typ, 0, 1)
}
sl = reflect.Append(sl, ext)
SetExtension(ep, desc, sl.Interface())
}
} else {
// This is a normal, non-extension field.
fi, props, ok := structFieldByName(st, tok.value)
if !ok {
return p.errorf("unknown field name %q in %v", tok.value, st)
}
dst := sv.Field(fi)
isDstNil := isNil(dst)
// Check that it's not already set if it's not a repeated field.
if !props.Repeated && !isDstNil {
return p.errorf("non-repeated field %q was repeated", tok.value)
}
if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
return err
}
// Parse into the field.
if err := p.readAny(dst, props); err != nil {
return err
}
if props.Required {
reqCount--
}
}
// For backward compatibility, permit a semicolon or comma after a field.
tok = p.next()
if tok.err != nil {
return tok.err
}
if tok.value != ";" && tok.value != "," {
p.back()
}
}
if reqCount > 0 {
return p.missingRequiredFieldError(sv)
}
return nil
}
func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value == "" {
return p.errorf("unexpected EOF")
}
switch fv := v; fv.Kind() {
case reflect.Slice:
at := v.Type()
if at.Elem().Kind() == reflect.Uint8 {
// Special case for []byte
if tok.value[0] != '"' && tok.value[0] != '\'' {
// Deliberately written out here, as the error after
// this switch statement would write "invalid []byte: ...",
// which is not as user-friendly.
return p.errorf("invalid string: %v", tok.value)
}
bytes := []byte(tok.unquoted)
fv.Set(reflect.ValueOf(bytes))
return nil
}
// Repeated field. May already exist.
flen := fv.Len()
if flen == fv.Cap() {
nav := reflect.MakeSlice(at, flen, 2*flen+1)
reflect.Copy(nav, fv)
fv.Set(nav)
}
fv.SetLen(flen + 1)
// Read one.
p.back()
return p.readAny(fv.Index(flen), props)
case reflect.Bool:
// Either "true", "false", 1 or 0.
switch tok.value {
case "true", "1":
fv.SetBool(true)
return nil
case "false", "0":
fv.SetBool(false)
return nil
}
case reflect.Float32, reflect.Float64:
v := tok.value
// Ignore 'f' for compatibility with output generated by C++, but don't
// remove 'f' when the value is "-inf" or "inf".
if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
v = v[:len(v)-1]
}
if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
fv.SetFloat(f)
return nil
}
case reflect.Int32:
if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
fv.SetInt(x)
return nil
}
if len(props.Enum) == 0 {
break
}
m, ok := enumValueMaps[props.Enum]
if !ok {
break
}
x, ok := m[tok.value]
if !ok {
break
}
fv.SetInt(int64(x))
return nil
case reflect.Int64:
if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
fv.SetInt(x)
return nil
}
case reflect.Ptr:
// A basic field (indirected through pointer), or a repeated message/group
p.back()
fv.Set(reflect.New(fv.Type().Elem()))
return p.readAny(fv.Elem(), props)
case reflect.String:
if tok.value[0] == '"' || tok.value[0] == '\'' {
fv.SetString(tok.unquoted)
return nil
}
case reflect.Struct:
var terminator string
switch tok.value {
case "{":
terminator = "}"
case "<":
terminator = ">"
default:
return p.errorf("expected '{' or '<', found %q", tok.value)
}
return p.readStruct(fv, terminator)
case reflect.Uint32:
if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
fv.SetUint(uint64(x))
return nil
}
case reflect.Uint64:
if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
fv.SetUint(x)
return nil
}
}
return p.errorf("invalid %v: %v", v.Type(), tok.value)
}
// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
// before starting to unmarshal, so any existing data in pb is always removed.
func UnmarshalText(s string, pb Message) error {
pb.Reset()
v := reflect.ValueOf(pb)
if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
return pe
}
return nil
}
1
https://gitee.com/who7708/etcd.git
git@gitee.com:who7708/etcd.git
who7708
etcd
etcd
v0.4.8

搜索帮助