// 
// Copyright (c) 2011-2019 Canonical Ltd
// Copyright (c) 2006-2010 Kirill Simonov
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
// 
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

package yaml

import (
	
)

// Set the reader error and return 0.
func yaml_parser_set_reader_error( *yaml_parser_t,  string,  int,  int) bool {
	.error = yaml_READER_ERROR
	.problem = 
	.problem_offset = 
	.problem_value = 
	return false
}

// Byte order marks.
const (
	bom_UTF8    = "\xef\xbb\xbf"
	bom_UTF16LE = "\xff\xfe"
	bom_UTF16BE = "\xfe\xff"
)

// Determine the input stream encoding by checking the BOM symbol. If no BOM is
// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.
func yaml_parser_determine_encoding( *yaml_parser_t) bool {
	// Ensure that we had enough bytes in the raw buffer.
	for !.eof && len(.raw_buffer)-.raw_buffer_pos < 3 {
		if !yaml_parser_update_raw_buffer() {
			return false
		}
	}

	// Determine the encoding.
	 := .raw_buffer
	 := .raw_buffer_pos
	 := len() - 
	if  >= 2 && [] == bom_UTF16LE[0] && [+1] == bom_UTF16LE[1] {
		.encoding = yaml_UTF16LE_ENCODING
		.raw_buffer_pos += 2
		.offset += 2
	} else if  >= 2 && [] == bom_UTF16BE[0] && [+1] == bom_UTF16BE[1] {
		.encoding = yaml_UTF16BE_ENCODING
		.raw_buffer_pos += 2
		.offset += 2
	} else if  >= 3 && [] == bom_UTF8[0] && [+1] == bom_UTF8[1] && [+2] == bom_UTF8[2] {
		.encoding = yaml_UTF8_ENCODING
		.raw_buffer_pos += 3
		.offset += 3
	} else {
		.encoding = yaml_UTF8_ENCODING
	}
	return true
}

// Update the raw buffer.
func yaml_parser_update_raw_buffer( *yaml_parser_t) bool {
	 := 0

	// Return if the raw buffer is full.
	if .raw_buffer_pos == 0 && len(.raw_buffer) == cap(.raw_buffer) {
		return true
	}

	// Return on EOF.
	if .eof {
		return true
	}

	// Move the remaining bytes in the raw buffer to the beginning.
	if .raw_buffer_pos > 0 && .raw_buffer_pos < len(.raw_buffer) {
		copy(.raw_buffer, .raw_buffer[.raw_buffer_pos:])
	}
	.raw_buffer = .raw_buffer[:len(.raw_buffer)-.raw_buffer_pos]
	.raw_buffer_pos = 0

	// Call the read handler to fill the buffer.
	,  := .read_handler(, .raw_buffer[len(.raw_buffer):cap(.raw_buffer)])
	.raw_buffer = .raw_buffer[:len(.raw_buffer)+]
	if  == io.EOF {
		.eof = true
	} else if  != nil {
		return yaml_parser_set_reader_error(, "input error: "+.Error(), .offset, -1)
	}
	return true
}

// Ensure that the buffer contains at least `length` characters.
// Return true on success, false on failure.
//
// The length is supposed to be significantly less that the buffer size.
func yaml_parser_update_buffer( *yaml_parser_t,  int) bool {
	if .read_handler == nil {
		panic("read handler must be set")
	}

	// [Go] This function was changed to guarantee the requested length size at EOF.
	// The fact we need to do this is pretty awful, but the description above implies
	// for that to be the case, and there are tests

	// If the EOF flag is set and the raw buffer is empty, do nothing.
	if .eof && .raw_buffer_pos == len(.raw_buffer) {
		// [Go] ACTUALLY! Read the documentation of this function above.
		// This is just broken. To return true, we need to have the
		// given length in the buffer. Not doing that means every single
		// check that calls this function to make sure the buffer has a
		// given length is Go) panicking; or C) accessing invalid memory.
		//return true
	}

	// Return if the buffer contains enough characters.
	if .unread >=  {
		return true
	}

	// Determine the input encoding if it is not known yet.
	if .encoding == yaml_ANY_ENCODING {
		if !yaml_parser_determine_encoding() {
			return false
		}
	}

	// Move the unread characters to the beginning of the buffer.
	 := len(.buffer)
	if .buffer_pos > 0 && .buffer_pos <  {
		copy(.buffer, .buffer[.buffer_pos:])
		 -= .buffer_pos
		.buffer_pos = 0
	} else if .buffer_pos ==  {
		 = 0
		.buffer_pos = 0
	}

	// Open the whole buffer for writing, and cut it before returning.
	.buffer = .buffer[:cap(.buffer)]

	// Fill the buffer until it has enough characters.
	 := true
	for .unread <  {

		// Fill the raw buffer if necessary.
		if ! || .raw_buffer_pos == len(.raw_buffer) {
			if !yaml_parser_update_raw_buffer() {
				.buffer = .buffer[:]
				return false
			}
		}
		 = false

		// Decode the raw buffer.
	:
		for .raw_buffer_pos != len(.raw_buffer) {
			var  rune
			var  int

			 := len(.raw_buffer) - .raw_buffer_pos

			// Decode the next character.
			switch .encoding {
			case yaml_UTF8_ENCODING:
				// Decode a UTF-8 character.  Check RFC 3629
				// (http://www.ietf.org/rfc/rfc3629.txt) for more details.
				//
				// The following table (taken from the RFC) is used for
				// decoding.
				//
				//    Char. number range |        UTF-8 octet sequence
				//      (hexadecimal)    |              (binary)
				//   --------------------+------------------------------------
				//   0000 0000-0000 007F | 0xxxxxxx
				//   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
				//   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
				//   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
				//
				// Additionally, the characters in the range 0xD800-0xDFFF
				// are prohibited as they are reserved for use with UTF-16
				// surrogate pairs.

				// Determine the length of the UTF-8 sequence.
				 := .raw_buffer[.raw_buffer_pos]
				switch {
				case &0x80 == 0x00:
					 = 1
				case &0xE0 == 0xC0:
					 = 2
				case &0xF0 == 0xE0:
					 = 3
				case &0xF8 == 0xF0:
					 = 4
				default:
					// The leading octet is invalid.
					return yaml_parser_set_reader_error(,
						"invalid leading UTF-8 octet",
						.offset, int())
				}

				// Check if the raw buffer contains an incomplete character.
				if  >  {
					if .eof {
						return yaml_parser_set_reader_error(,
							"incomplete UTF-8 octet sequence",
							.offset, -1)
					}
					break 
				}

				// Decode the leading octet.
				switch {
				case &0x80 == 0x00:
					 = rune( & 0x7F)
				case &0xE0 == 0xC0:
					 = rune( & 0x1F)
				case &0xF0 == 0xE0:
					 = rune( & 0x0F)
				case &0xF8 == 0xF0:
					 = rune( & 0x07)
				default:
					 = 0
				}

				// Check and decode the trailing octets.
				for  := 1;  < ; ++ {
					 = .raw_buffer[.raw_buffer_pos+]

					// Check if the octet is valid.
					if ( & 0xC0) != 0x80 {
						return yaml_parser_set_reader_error(,
							"invalid trailing UTF-8 octet",
							.offset+, int())
					}

					// Decode the octet.
					 = ( << 6) + rune(&0x3F)
				}

				// Check the length of the sequence against the value.
				switch {
				case  == 1:
				case  == 2 &&  >= 0x80:
				case  == 3 &&  >= 0x800:
				case  == 4 &&  >= 0x10000:
				default:
					return yaml_parser_set_reader_error(,
						"invalid length of a UTF-8 sequence",
						.offset, -1)
				}

				// Check the range of the value.
				if  >= 0xD800 &&  <= 0xDFFF ||  > 0x10FFFF {
					return yaml_parser_set_reader_error(,
						"invalid Unicode character",
						.offset, int())
				}

			case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING:
				var ,  int
				if .encoding == yaml_UTF16LE_ENCODING {
					,  = 0, 1
				} else {
					,  = 1, 0
				}

				// The UTF-16 encoding is not as simple as one might
				// naively think.  Check RFC 2781
				// (http://www.ietf.org/rfc/rfc2781.txt).
				//
				// Normally, two subsequent bytes describe a Unicode
				// character.  However a special technique (called a
				// surrogate pair) is used for specifying character
				// values larger than 0xFFFF.
				//
				// A surrogate pair consists of two pseudo-characters:
				//      high surrogate area (0xD800-0xDBFF)
				//      low surrogate area (0xDC00-0xDFFF)
				//
				// The following formulas are used for decoding
				// and encoding characters using surrogate pairs:
				//
				//  U  = U' + 0x10000   (0x01 00 00 <= U <= 0x10 FF FF)
				//  U' = yyyyyyyyyyxxxxxxxxxx   (0 <= U' <= 0x0F FF FF)
				//  W1 = 110110yyyyyyyyyy
				//  W2 = 110111xxxxxxxxxx
				//
				// where U is the character value, W1 is the high surrogate
				// area, W2 is the low surrogate area.

				// Check for incomplete UTF-16 character.
				if  < 2 {
					if .eof {
						return yaml_parser_set_reader_error(,
							"incomplete UTF-16 character",
							.offset, -1)
					}
					break 
				}

				// Get the character.
				 = rune(.raw_buffer[.raw_buffer_pos+]) +
					(rune(.raw_buffer[.raw_buffer_pos+]) << 8)

				// Check for unexpected low surrogate area.
				if &0xFC00 == 0xDC00 {
					return yaml_parser_set_reader_error(,
						"unexpected low surrogate area",
						.offset, int())
				}

				// Check for a high surrogate area.
				if &0xFC00 == 0xD800 {
					 = 4

					// Check for incomplete surrogate pair.
					if  < 4 {
						if .eof {
							return yaml_parser_set_reader_error(,
								"incomplete UTF-16 surrogate pair",
								.offset, -1)
						}
						break 
					}

					// Get the next character.
					 := rune(.raw_buffer[.raw_buffer_pos++2]) +
						(rune(.raw_buffer[.raw_buffer_pos++2]) << 8)

					// Check for a low surrogate area.
					if &0xFC00 != 0xDC00 {
						return yaml_parser_set_reader_error(,
							"expected low surrogate area",
							.offset+2, int())
					}

					// Generate the value of the surrogate pair.
					 = 0x10000 + (( & 0x3FF) << 10) + ( & 0x3FF)
				} else {
					 = 2
				}

			default:
				panic("impossible")
			}

			// Check if the character is in the allowed range:
			//      #x9 | #xA | #xD | [#x20-#x7E]               (8 bit)
			//      | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD]    (16 bit)
			//      | [#x10000-#x10FFFF]                        (32 bit)
			switch {
			case  == 0x09:
			case  == 0x0A:
			case  == 0x0D:
			case  >= 0x20 &&  <= 0x7E:
			case  == 0x85:
			case  >= 0xA0 &&  <= 0xD7FF:
			case  >= 0xE000 &&  <= 0xFFFD:
			case  >= 0x10000 &&  <= 0x10FFFF:
			default:
				return yaml_parser_set_reader_error(,
					"control characters are not allowed",
					.offset, int())
			}

			// Move the raw pointers.
			.raw_buffer_pos += 
			.offset += 

			// Finally put the character into the buffer.
			if  <= 0x7F {
				// 0000 0000-0000 007F . 0xxxxxxx
				.buffer[+0] = byte()
				 += 1
			} else if  <= 0x7FF {
				// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
				.buffer[+0] = byte(0xC0 + ( >> 6))
				.buffer[+1] = byte(0x80 + ( & 0x3F))
				 += 2
			} else if  <= 0xFFFF {
				// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
				.buffer[+0] = byte(0xE0 + ( >> 12))
				.buffer[+1] = byte(0x80 + (( >> 6) & 0x3F))
				.buffer[+2] = byte(0x80 + ( & 0x3F))
				 += 3
			} else {
				// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
				.buffer[+0] = byte(0xF0 + ( >> 18))
				.buffer[+1] = byte(0x80 + (( >> 12) & 0x3F))
				.buffer[+2] = byte(0x80 + (( >> 6) & 0x3F))
				.buffer[+3] = byte(0x80 + ( & 0x3F))
				 += 4
			}

			.unread++
		}

		// On EOF, put NUL into the buffer and return.
		if .eof {
			.buffer[] = 0
			++
			.unread++
			break
		}
	}
	// [Go] Read the documentation of this function above. To return true,
	// we need to have the given length in the buffer. Not doing that means
	// every single check that calls this function to make sure the buffer
	// has a given length is Go) panicking; or C) accessing invalid memory.
	// This happens here due to the EOF above breaking early.
	for  <  {
		.buffer[] = 0
		++
	}
	.buffer = .buffer[:]
	return true
}