// // Copyright (c) 2011-2019 Canonical Ltd// Copyright (c) 2006-2010 Kirill Simonov// // Permission is hereby granted, free of charge, to any person obtaining a copy of// this software and associated documentation files (the "Software"), to deal in// the Software without restriction, including without limitation the rights to// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies// of the Software, and to permit persons to whom the Software is furnished to do// so, subject to the following conditions:// // The above copyright notice and this permission notice shall be included in all// copies or substantial portions of the Software.// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE// SOFTWARE.package yamlimport ()// Set the reader error and return 0.func yaml_parser_set_reader_error( *yaml_parser_t, string, int, int) bool { .error = yaml_READER_ERROR .problem = .problem_offset = .problem_value = returnfalse}// Byte order marks.const ( bom_UTF8 = "\xef\xbb\xbf" bom_UTF16LE = "\xff\xfe" bom_UTF16BE = "\xfe\xff")// Determine the input stream encoding by checking the BOM symbol. If no BOM is// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.func yaml_parser_determine_encoding( *yaml_parser_t) bool {// Ensure that we had enough bytes in the raw buffer.for !.eof && len(.raw_buffer)-.raw_buffer_pos < 3 {if !yaml_parser_update_raw_buffer() {returnfalse } }// Determine the encoding. := .raw_buffer := .raw_buffer_pos := len() - if >= 2 && [] == bom_UTF16LE[0] && [+1] == bom_UTF16LE[1] { .encoding = yaml_UTF16LE_ENCODING .raw_buffer_pos += 2 .offset += 2 } elseif >= 2 && [] == bom_UTF16BE[0] && [+1] == bom_UTF16BE[1] { .encoding = yaml_UTF16BE_ENCODING .raw_buffer_pos += 2 .offset += 2 } elseif >= 3 && [] == bom_UTF8[0] && [+1] == bom_UTF8[1] && [+2] == bom_UTF8[2] { .encoding = yaml_UTF8_ENCODING .raw_buffer_pos += 3 .offset += 3 } else { .encoding = yaml_UTF8_ENCODING }returntrue}// Update the raw buffer.func yaml_parser_update_raw_buffer( *yaml_parser_t) bool { := 0// Return if the raw buffer is full.if .raw_buffer_pos == 0 && len(.raw_buffer) == cap(.raw_buffer) {returntrue }// Return on EOF.if .eof {returntrue }// Move the remaining bytes in the raw buffer to the beginning.if .raw_buffer_pos > 0 && .raw_buffer_pos < len(.raw_buffer) {copy(.raw_buffer, .raw_buffer[.raw_buffer_pos:]) } .raw_buffer = .raw_buffer[:len(.raw_buffer)-.raw_buffer_pos] .raw_buffer_pos = 0// Call the read handler to fill the buffer. , := .read_handler(, .raw_buffer[len(.raw_buffer):cap(.raw_buffer)]) .raw_buffer = .raw_buffer[:len(.raw_buffer)+]if == io.EOF { .eof = true } elseif != nil {returnyaml_parser_set_reader_error(, "input error: "+.Error(), .offset, -1) }returntrue}// Ensure that the buffer contains at least `length` characters.// Return true on success, false on failure.//// The length is supposed to be significantly less that the buffer size.func yaml_parser_update_buffer( *yaml_parser_t, int) bool {if .read_handler == nil {panic("read handler must be set") }// [Go] This function was changed to guarantee the requested length size at EOF. // The fact we need to do this is pretty awful, but the description above implies // for that to be the case, and there are tests// If the EOF flag is set and the raw buffer is empty, do nothing.if .eof && .raw_buffer_pos == len(.raw_buffer) {// [Go] ACTUALLY! Read the documentation of this function above. // This is just broken. To return true, we need to have the // given length in the buffer. Not doing that means every single // check that calls this function to make sure the buffer has a // given length is Go) panicking; or C) accessing invalid memory. //return true }// Return if the buffer contains enough characters.if .unread >= {returntrue }// Determine the input encoding if it is not known yet.if .encoding == yaml_ANY_ENCODING {if !yaml_parser_determine_encoding() {returnfalse } }// Move the unread characters to the beginning of the buffer. := len(.buffer)if .buffer_pos > 0 && .buffer_pos < {copy(.buffer, .buffer[.buffer_pos:]) -= .buffer_pos .buffer_pos = 0 } elseif .buffer_pos == { = 0 .buffer_pos = 0 }// Open the whole buffer for writing, and cut it before returning. .buffer = .buffer[:cap(.buffer)]// Fill the buffer until it has enough characters. := truefor .unread < {// Fill the raw buffer if necessary.if ! || .raw_buffer_pos == len(.raw_buffer) {if !yaml_parser_update_raw_buffer() { .buffer = .buffer[:]returnfalse } } = false// Decode the raw buffer. :for .raw_buffer_pos != len(.raw_buffer) {varrunevarint := len(.raw_buffer) - .raw_buffer_pos// Decode the next character.switch .encoding {caseyaml_UTF8_ENCODING:// Decode a UTF-8 character. Check RFC 3629 // (http://www.ietf.org/rfc/rfc3629.txt) for more details. // // The following table (taken from the RFC) is used for // decoding. // // Char. number range | UTF-8 octet sequence // (hexadecimal) | (binary) // --------------------+------------------------------------ // 0000 0000-0000 007F | 0xxxxxxx // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // // Additionally, the characters in the range 0xD800-0xDFFF // are prohibited as they are reserved for use with UTF-16 // surrogate pairs.// Determine the length of the UTF-8 sequence. := .raw_buffer[.raw_buffer_pos]switch {case &0x80 == 0x00: = 1case &0xE0 == 0xC0: = 2case &0xF0 == 0xE0: = 3case &0xF8 == 0xF0: = 4default:// The leading octet is invalid.returnyaml_parser_set_reader_error(,"invalid leading UTF-8 octet", .offset, int()) }// Check if the raw buffer contains an incomplete character.if > {if .eof {returnyaml_parser_set_reader_error(,"incomplete UTF-8 octet sequence", .offset, -1) }break }// Decode the leading octet.switch {case &0x80 == 0x00: = rune( & 0x7F)case &0xE0 == 0xC0: = rune( & 0x1F)case &0xF0 == 0xE0: = rune( & 0x0F)case &0xF8 == 0xF0: = rune( & 0x07)default: = 0 }// Check and decode the trailing octets.for := 1; < ; ++ { = .raw_buffer[.raw_buffer_pos+]// Check if the octet is valid.if ( & 0xC0) != 0x80 {returnyaml_parser_set_reader_error(,"invalid trailing UTF-8 octet", .offset+, int()) }// Decode the octet. = ( << 6) + rune(&0x3F) }// Check the length of the sequence against the value.switch {case == 1:case == 2 && >= 0x80:case == 3 && >= 0x800:case == 4 && >= 0x10000:default:returnyaml_parser_set_reader_error(,"invalid length of a UTF-8 sequence", .offset, -1) }// Check the range of the value.if >= 0xD800 && <= 0xDFFF || > 0x10FFFF {returnyaml_parser_set_reader_error(,"invalid Unicode character", .offset, int()) }caseyaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING:var , intif .encoding == yaml_UTF16LE_ENCODING { , = 0, 1 } else { , = 1, 0 }// The UTF-16 encoding is not as simple as one might // naively think. Check RFC 2781 // (http://www.ietf.org/rfc/rfc2781.txt). // // Normally, two subsequent bytes describe a Unicode // character. However a special technique (called a // surrogate pair) is used for specifying character // values larger than 0xFFFF. // // A surrogate pair consists of two pseudo-characters: // high surrogate area (0xD800-0xDBFF) // low surrogate area (0xDC00-0xDFFF) // // The following formulas are used for decoding // and encoding characters using surrogate pairs: // // U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) // U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) // W1 = 110110yyyyyyyyyy // W2 = 110111xxxxxxxxxx // // where U is the character value, W1 is the high surrogate // area, W2 is the low surrogate area.// Check for incomplete UTF-16 character.if < 2 {if .eof {returnyaml_parser_set_reader_error(,"incomplete UTF-16 character", .offset, -1) }break }// Get the character. = rune(.raw_buffer[.raw_buffer_pos+]) + (rune(.raw_buffer[.raw_buffer_pos+]) << 8)// Check for unexpected low surrogate area.if &0xFC00 == 0xDC00 {returnyaml_parser_set_reader_error(,"unexpected low surrogate area", .offset, int()) }// Check for a high surrogate area.if &0xFC00 == 0xD800 { = 4// Check for incomplete surrogate pair.if < 4 {if .eof {returnyaml_parser_set_reader_error(,"incomplete UTF-16 surrogate pair", .offset, -1) }break }// Get the next character. := rune(.raw_buffer[.raw_buffer_pos++2]) + (rune(.raw_buffer[.raw_buffer_pos++2]) << 8)// Check for a low surrogate area.if &0xFC00 != 0xDC00 {returnyaml_parser_set_reader_error(,"expected low surrogate area", .offset+2, int()) }// Generate the value of the surrogate pair. = 0x10000 + (( & 0x3FF) << 10) + ( & 0x3FF) } else { = 2 }default:panic("impossible") }// Check if the character is in the allowed range: // #x9 | #xA | #xD | [#x20-#x7E] (8 bit) // | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) // | [#x10000-#x10FFFF] (32 bit)switch {case == 0x09:case == 0x0A:case == 0x0D:case >= 0x20 && <= 0x7E:case == 0x85:case >= 0xA0 && <= 0xD7FF:case >= 0xE000 && <= 0xFFFD:case >= 0x10000 && <= 0x10FFFF:default:returnyaml_parser_set_reader_error(,"control characters are not allowed", .offset, int()) }// Move the raw pointers. .raw_buffer_pos += .offset += // Finally put the character into the buffer.if <= 0x7F {// 0000 0000-0000 007F . 0xxxxxxx .buffer[+0] = byte() += 1 } elseif <= 0x7FF {// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx .buffer[+0] = byte(0xC0 + ( >> 6)) .buffer[+1] = byte(0x80 + ( & 0x3F)) += 2 } elseif <= 0xFFFF {// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx .buffer[+0] = byte(0xE0 + ( >> 12)) .buffer[+1] = byte(0x80 + (( >> 6) & 0x3F)) .buffer[+2] = byte(0x80 + ( & 0x3F)) += 3 } else {// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx .buffer[+0] = byte(0xF0 + ( >> 18)) .buffer[+1] = byte(0x80 + (( >> 12) & 0x3F)) .buffer[+2] = byte(0x80 + (( >> 6) & 0x3F)) .buffer[+3] = byte(0x80 + ( & 0x3F)) += 4 } .unread++ }// On EOF, put NUL into the buffer and return.if .eof { .buffer[] = 0 ++ .unread++break } }// [Go] Read the documentation of this function above. To return true, // we need to have the given length in the buffer. Not doing that means // every single check that calls this function to make sure the buffer // has a given length is Go) panicking; or C) accessing invalid memory. // This happens here due to the EOF above breaking early.for < { .buffer[] = 0 ++ } .buffer = .buffer[:]returntrue}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.