package jsonparser

import (
	
	
)

// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7

const supplementalPlanesOffset = 0x10000
const highSurrogateOffset = 0xD800
const lowSurrogateOffset = 0xDC00

const basicMultilingualPlaneReservedOffset = 0xDFFF
const basicMultilingualPlaneOffset = 0xFFFF

func combineUTF16Surrogates(,  rune) rune {
	return supplementalPlanesOffset + (-highSurrogateOffset)<<10 + ( - lowSurrogateOffset)
}

const badHex = -1

func h2I( byte) int {
	switch {
	case  >= '0' &&  <= '9':
		return int( - '0')
	case  >= 'A' &&  <= 'F':
		return int( - 'A' + 10)
	case  >= 'a' &&  <= 'f':
		return int( - 'a' + 10)
	}
	return badHex
}

// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and
// is not checked.
// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together.
// This function only handles one; decodeUnicodeEscape handles this more complex case.
func decodeSingleUnicodeEscape( []byte) (rune, bool) {
	// We need at least 6 characters total
	if len() < 6 {
		return utf8.RuneError, false
	}

	// Convert hex to decimal
	, , ,  := h2I([2]), h2I([3]), h2I([4]), h2I([5])
	if  == badHex ||  == badHex ||  == badHex ||  == badHex {
		return utf8.RuneError, false
	}

	// Compose the hex digits
	return rune(<<12 + <<8 + <<4 + ), true
}

// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,
// which is used to describe UTF16 chars.
// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
func isUTF16EncodedRune( rune) bool {
	return highSurrogateOffset <=  &&  <= basicMultilingualPlaneReservedOffset
}

func decodeUnicodeEscape( []byte) (rune, int) {
	if ,  := decodeSingleUnicodeEscape(); ! {
		// Invalid Unicode escape
		return utf8.RuneError, -1
	} else if  <= basicMultilingualPlaneOffset && !isUTF16EncodedRune() {
		// Valid Unicode escape in Basic Multilingual Plane
		return , 6
	} else if ,  := decodeSingleUnicodeEscape([6:]); ! { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain
		// UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate"
		return utf8.RuneError, -1
	} else if  < lowSurrogateOffset {
		// Invalid UTF16 "low surrogate"
		return utf8.RuneError, -1
	} else {
		// Valid UTF16 surrogate pair
		return combineUTF16Surrogates(, ), 12
	}
}

// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]
var backslashCharEscapeTable = [...]byte{
	'"':  '"',
	'\\': '\\',
	'/':  '/',
	'b':  '\b',
	'f':  '\f',
	'n':  '\n',
	'r':  '\r',
	't':  '\t',
}

// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns
// how many characters were consumed from 'in' and emitted into 'out'.
// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error.
func unescapeToUTF8(,  []byte) ( int,  int) {
	if len() < 2 || [0] != '\\' {
		// Invalid escape due to insufficient characters for any escape or no initial backslash
		return -1, -1
	}

	// https://tools.ietf.org/html/rfc7159#section-7
	switch  := [1];  {
	case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
		// Valid basic 2-character escapes (use lookup table)
		[0] = backslashCharEscapeTable[]
		return 2, 1
	case 'u':
		// Unicode escape
		if ,  := decodeUnicodeEscape();  == -1 {
			// Invalid Unicode escape
			return -1, -1
		} else {
			// Valid Unicode escape; re-encode as UTF8
			 := utf8.EncodeRune(, )
			return , 
		}
	}

	return -1, -1
}

// unescape unescapes the string contained in 'in' and returns it as a slice.
// If 'in' contains no escaped characters:
//   Returns 'in'.
// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)):
//   'out' is used to build the unescaped string and is returned with no extra allocation
// Else:
//   A new slice is allocated and returned.
func (,  []byte) ([]byte, error) {
	 := bytes.IndexByte(, '\\')
	if  == -1 {
		return , nil
	}

	// Get a buffer of sufficient size (allocate if needed)
	if cap() < len() {
		 = make([]byte, len())
	} else {
		 = [0:len()]
	}

	// Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice)
	copy(, [:])
	 = [:]
	 := [:]

	for len() > 0 {
		// Unescape the next escaped character
		,  := unescapeToUTF8(, )
		if  == -1 {
			return nil, MalformedStringEscapeError
		}

		 = [:]
		 = [:]

		// Copy everything up until the next backslash
		 := bytes.IndexByte(, '\\')
		if  == -1 {
			copy(, )
			 = [len():]
			break
		} else {
			copy(, [:])
			 = [:]
			 = [:]
		}
	}

	// Trim the out buffer to the amount that was actually emitted
	return [:len()-len()], nil
}