// Copyright 2018 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package textimport ()// Decoder is a token-based textproto decoder.typeDecoderstruct {// lastCall is last method called, either readCall or peekCall. // Initial value is readCall. lastCall call// lastToken contains the last read token. lastToken Token// lastErr contains the last read error. lastErr error// openStack is a stack containing the byte characters for MessageOpen and // ListOpen kinds. The top of stack represents the message or the list that // the current token is nested in. An empty stack means the current token is // at the top level message. The characters '{' and '<' both represent the // MessageOpen kind. openStack []byte// orig is used in reporting line and column. orig []byte// in contains the unconsumed input. in []byte}// NewDecoder returns a Decoder to read the given []byte.func ( []byte) *Decoder {return &Decoder{orig: , in: }}// ErrUnexpectedEOF means that EOF was encountered in the middle of the input.varErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)// call specifies which Decoder method was invoked.type call uint8const ( readCall call = iota peekCall)// Peek looks ahead and returns the next token and error without advancing a read.func ( *Decoder) () (Token, error) {deferfunc() { .lastCall = peekCall }()if .lastCall == readCall { .lastToken, .lastErr = .Read() }return .lastToken, .lastErr}// Read returns the next token.// It will return an error if there is no valid token.func ( *Decoder) () (Token, error) {deferfunc() { .lastCall = readCall }()if .lastCall == peekCall {return .lastToken, .lastErr } , := .parseNext(.lastToken.kind)if != nil {returnToken{}, }switch .kind {casecomma, semicolon: , = .parseNext(.kind)if != nil {returnToken{}, } } .lastToken = return , nil}const ( mismatchedFmt = "mismatched close character %q" unexpectedFmt = "unexpected character %q")// parseNext parses the next Token based on given last kind.func ( *Decoder) ( Kind) (Token, error) {// Trim leading spaces. .consume(0) := falseiflen(.in) == 0 { = true }switch {caseEOF:return .consumeToken(EOF, 0, 0), nilcasebof:// Start of top level message. Next token can be EOF or Name.if {return .consumeToken(EOF, 0, 0), nil }return .parseFieldName()caseName:// Next token can be MessageOpen, ListOpen or Scalar.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case'{', '<': .pushOpenStack()return .consumeToken(MessageOpen, 1, 0), nilcase'[': .pushOpenStack()return .consumeToken(ListOpen, 1, 0), nildefault:return .parseScalar() }caseScalar: , := .currentOpenKind()switch {casebof:// Top level message. // Next token can be EOF, comma, semicolon or Name.if {return .consumeToken(EOF, 0, 0), nil }switch .in[0] {case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }caseMessageOpen:// Next token can be MessageClose, comma, semicolon or Name.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case : .popOpenStack()return .consumeToken(MessageClose, 1, 0), nilcaseotherCloseChar[]:returnToken{}, .newSyntaxError(mismatchedFmt, )case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }caseListOpen:// Next token can be ListClose or comma.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case']': .popOpenStack()return .consumeToken(ListClose, 1, 0), nilcase',':return .consumeToken(comma, 1, 0), nildefault:returnToken{}, .newSyntaxError(unexpectedFmt, ) } }caseMessageOpen:// Next token can be MessageClose or Name.if {returnToken{}, ErrUnexpectedEOF } , := .currentOpenKind()switch := .in[0]; {case : .popOpenStack()return .consumeToken(MessageClose, 1, 0), nilcaseotherCloseChar[]:returnToken{}, .newSyntaxError(mismatchedFmt, )default:return .parseFieldName() }caseMessageClose: , := .currentOpenKind()switch {casebof:// Top level message. // Next token can be EOF, comma, semicolon or Name.if {return .consumeToken(EOF, 0, 0), nil }switch := .in[0]; {case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }caseMessageOpen:// Next token can be MessageClose, comma, semicolon or Name.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case : .popOpenStack()return .consumeToken(MessageClose, 1, 0), nilcaseotherCloseChar[]:returnToken{}, .newSyntaxError(mismatchedFmt, )case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }caseListOpen:// Next token can be ListClose or commaif {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case : .popOpenStack()return .consumeToken(ListClose, 1, 0), nilcase',':return .consumeToken(comma, 1, 0), nildefault:returnToken{}, .newSyntaxError(unexpectedFmt, ) } }caseListOpen:// Next token can be ListClose, MessageStart or Scalar.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case']': .popOpenStack()return .consumeToken(ListClose, 1, 0), nilcase'{', '<': .pushOpenStack()return .consumeToken(MessageOpen, 1, 0), nildefault:return .parseScalar() }caseListClose: , := .currentOpenKind()switch {casebof:// Top level message. // Next token can be EOF, comma, semicolon or Name.if {return .consumeToken(EOF, 0, 0), nil }switch := .in[0]; {case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }caseMessageOpen:// Next token can be MessageClose, comma, semicolon or Name.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case : .popOpenStack()return .consumeToken(MessageClose, 1, 0), nilcaseotherCloseChar[]:returnToken{}, .newSyntaxError(mismatchedFmt, )case',':return .consumeToken(comma, 1, 0), nilcase';':return .consumeToken(semicolon, 1, 0), nildefault:return .parseFieldName() }default:// It is not possible to have this case. Let it panic below. }casecomma, semicolon: , := .currentOpenKind()switch {casebof:// Top level message. Next token can be EOF or Name.if {return .consumeToken(EOF, 0, 0), nil }return .parseFieldName()caseMessageOpen:// Next token can be MessageClose or Name.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case : .popOpenStack()return .consumeToken(MessageClose, 1, 0), nilcaseotherCloseChar[]:returnToken{}, .newSyntaxError(mismatchedFmt, )default:return .parseFieldName() }caseListOpen:if == semicolon {// It is not be possible to have this case as logic here // should not have produced a semicolon Token when inside a // list. Let it panic below.break }// Next token can be MessageOpen or Scalar.if {returnToken{}, ErrUnexpectedEOF }switch := .in[0]; {case'{', '<': .pushOpenStack()return .consumeToken(MessageOpen, 1, 0), nildefault:return .parseScalar() } } } , := .Position(len(.orig) - len(.in))panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", , , ))}var otherCloseChar = map[byte]byte{'}': '>','>': '}',}// currentOpenKind indicates whether current position is inside a message, list// or top-level message by returning MessageOpen, ListOpen or bof respectively.// If the returned kind is either a MessageOpen or ListOpen, it also returns the// corresponding closing character.func ( *Decoder) () (Kind, byte) {iflen(.openStack) == 0 {returnbof, 0 } := .openStack[len(.openStack)-1]switch {case'{':returnMessageOpen, '}'case'<':returnMessageOpen, '>'case'[':returnListOpen, ']' }panic(fmt.Sprintf("Decoder: openStack contains invalid byte %c", ))}func ( *Decoder) ( byte) { .openStack = append(.openStack, )}func ( *Decoder) () { .openStack = .openStack[:len(.openStack)-1]}// parseFieldName parses field name and separator.func ( *Decoder) () ( Token, error) {deferfunc() {if == nil && .tryConsumeChar(':') { .attrs |= hasSeparator } }()// Extension or Any type URL.if .in[0] == '[' {return .parseTypeName() }// Identifier.if := parseIdent(.in, false); > 0 {return .consumeToken(Name, , uint8(IdentName)), nil }// Field number. Identify if input is a valid number that is not negative // and is decimal integer within 32-bit range.if := parseNumber(.in); .size > 0 { := .string(.in)if !.neg && .kind == numDec {if , := strconv.ParseInt(, 10, 32); == nil {return .consumeToken(Name, .size, uint8(FieldNumber)), nil } }returnToken{}, .newSyntaxError("invalid field number: %s", ) }returnToken{}, .newSyntaxError("invalid field name: %s", errId(.in))}// parseTypeName parses an Any type URL or an extension field name. The name is// enclosed in [ and ] characters. We allow almost arbitrary type URL prefixes,// closely following the text-format spec [1,2]. We implement "ExtensionName |// AnyName" as follows (with some exceptions for backwards compatibility)://// char = [-_a-zA-Z0-9]// url_char = char | [.~!$&'()*+,;=] | "%", hex, hex//// Ident = char, { char }// TypeName = Ident, { ".", Ident } ;// UrlPrefix = url_char, { url_char | "/" } ;// ExtensionName = "[", TypeName, "]" ;// AnyName = "[", UrlPrefix, "/", TypeName, "]" ;//// Additionally, we allow arbitrary whitespace and comments between [ and ].//// [1] https://protobuf.dev/reference/protobuf/textformat-spec/#characters// [2] https://protobuf.dev/reference/protobuf/textformat-spec/#field-namesfunc ( *Decoder) () (Token, error) {// Use alias s to advance first in order to use d.in for error handling. // Caller already checks for [ as first character (d.in[0] == '['). := consume(.in[1:], 0)iflen() == 0 {returnToken{}, ErrUnexpectedEOF }// Collect everything between [ and ] in name.var []bytevarboolforlen() > 0 && ! {switch {case [0] == ']': = [1:] = truecase [0] == '/' || isTypeNameChar([0]) || isUrlExtraChar([0]): = append(, [0]) = consume([1:], 0)// URL percent-encoded charscase [0] == '%':iflen() < 3 || !isHexChar([1]) || !isHexChar([2]) {returnToken{}, .parseTypeNameError(, 3) } = append(, [0], [1], [2]) = consume([3:], 0)default:returnToken{}, .parseTypeNameError(, 1) } }if ! {returnToken{}, ErrUnexpectedEOF }// Split collected name on last '/' into urlPrefix and typeName (if '/' is // present). := if := bytes.LastIndexByte(, '/'); != -1 { := [:] = [+1:]// urlPrefix may be empty (for backwards compatibility). // If non-empty, it must not start with '/'.iflen() > 0 && [0] == '/' {returnToken{}, .parseTypeNameError(, 0) } }// typeName must not be empty (note: "" splits to [""]) and all identifier // parts must not be empty.for , := rangebytes.Split(, []byte{'.'}) {iflen() == 0 {returnToken{}, .parseTypeNameError(, 0) } }// typeName must not contain any percent-encoded or special URL chars.for , := range {if == '%' || ( != '.' && isUrlExtraChar()) {returnToken{}, .parseTypeNameError(, 0) } } := len(.orig) - len(.in) := len(.orig) - len() .in = .consume(0)returnToken{kind: Name,attrs: uint8(TypeName),pos: ,raw: .orig[:],str: string(), }, nil}func ( *Decoder) ( []byte, int) error {return .newSyntaxError("invalid type URL/extension field name: %s", .in[:len(.in)-len()+min(, len())], )}func isHexChar( byte) bool {return ('0' <= && <= '9') || ('a' <= && <= 'f') || ('A' <= && <= 'F')}func isTypeNameChar( byte) bool {return == '-' || == '_' || ('0' <= && <= '9') || ('a' <= && <= 'z') || ('A' <= && <= 'Z')}// isUrlExtraChar complements isTypeNameChar with extra characters that we allow// in URLs but not in type names. Note that '/' is not included so that it can// be treated specially.func isUrlExtraChar( byte) bool {switch {case'.', '~', '!', '$', '&', '(', ')', '*', '+', ',', ';', '=':returntruedefault:returnfalse }}// parseIdent parses an unquoted proto identifier and returns size.// If allowNeg is true, it allows '-' to be the first character in the// identifier. This is used when parsing literal values like -infinity, etc.// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`func parseIdent( []byte, bool) int {varint := iflen() == 0 {return0 }if && [0] == '-' { = [1:] ++iflen() == 0 {return0 } }switch {case [0] == '_','a' <= [0] && [0] <= 'z','A' <= [0] && [0] <= 'Z': = [1:] ++default:return0 }forlen() > 0 && ([0] == '_' ||'a' <= [0] && [0] <= 'z' ||'A' <= [0] && [0] <= 'Z' ||'0' <= [0] && [0] <= '9') { = [1:] ++ }iflen() > 0 && !isDelim([0]) {return0 }return}// parseScalar parses for a string, literal or number value.func ( *Decoder) () (Token, error) {if .in[0] == '"' || .in[0] == '\'' {return .parseStringValue() }if , := .parseLiteralValue(); {return , nil }if , := .parseNumberValue(); {return , nil }returnToken{}, .newSyntaxError("invalid scalar value: %s", errId(.in))}// parseLiteralValue parses a literal value. A literal value is used for// bools, special floats and enums. This function simply identifies that the// field value is a literal.func ( *Decoder) () (Token, bool) { := parseIdent(.in, true)if == 0 {returnToken{}, false }return .consumeToken(Scalar, , literalValue), true}// consumeToken constructs a Token for given Kind from d.in and consumes given// size-length from it.func ( *Decoder) ( Kind, int, uint8) Token {// Important to compute raw and pos before consuming. := Token{kind: ,attrs: ,pos: len(.orig) - len(.in),raw: .in[:], } .consume()return}// newSyntaxError returns a syntax error with line and column information for// current position.func ( *Decoder) ( string, ...any) error { := errors.New(, ...) , := .Position(len(.orig) - len(.in))returnerrors.New("syntax error (line %d:%d): %v", , , )}// Position returns line and column number of given index of the original input.// It will panic if index is out of range.func ( *Decoder) ( int) ( int, int) { := .orig[:] = bytes.Count(, []byte("\n")) + 1if := bytes.LastIndexByte(, '\n'); >= 0 { = [+1:] } = utf8.RuneCount() + 1// ignore multi-rune charactersreturn , }func ( *Decoder) ( byte) bool {iflen(.in) > 0 && .in[0] == { .consume(1)returntrue }returnfalse}// consume consumes n bytes of input and any subsequent whitespace or comments.func ( *Decoder) ( int) { .in = consume(.in, )return}// consume consumes n bytes of input and any subsequent whitespace or comments.func consume( []byte, int) []byte { = [:]forlen() > 0 {switch [0] {case' ', '\n', '\r', '\t': = [1:]case'#':if := bytes.IndexByte(, '\n'); >= 0 { = [+len("\n"):] } else { = nil }default:return } }return}// errId extracts a byte sequence that looks like an invalid ID// (for the purposes of error reporting).func errId( []byte) []byte {const = 32for := 0; < len(); {if > {returnappend([::], "…"...) } , := utf8.DecodeRune([:])if > utf8.RuneSelf || ( != '/' && isDelim(byte())) {if == 0 {// Either the first byte is invalid UTF-8 or a // delimiter, or the first rune is non-ASCII. // Return it as-is. = }return [::] } += }// No delimiter found.return}// isDelim returns true if given byte is a delimiter character.func isDelim( byte) bool {return !( == '-' || == '+' || == '.' || == '_' || ('a' <= && <= 'z') || ('A' <= && <= 'Z') || ('0' <= && <= '9'))}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.