package jsonparserimport ()// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7const supplementalPlanesOffset = 0x10000const highSurrogateOffset = 0xD800const lowSurrogateOffset = 0xDC00const basicMultilingualPlaneReservedOffset = 0xDFFFconst basicMultilingualPlaneOffset = 0xFFFFfunc combineUTF16Surrogates(, rune) rune {returnsupplementalPlanesOffset + (-highSurrogateOffset)<<10 + ( - lowSurrogateOffset)}const badHex = -1func h2I( byte) int {switch {case >= '0' && <= '9':returnint( - '0')case >= 'A' && <= 'F':returnint( - 'A' + 10)case >= 'a' && <= 'f':returnint( - 'a' + 10) }returnbadHex}// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and// is not checked.// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together.// This function only handles one; decodeUnicodeEscape handles this more complex case.func decodeSingleUnicodeEscape( []byte) (rune, bool) {// We need at least 6 characters totaliflen() < 6 {returnutf8.RuneError, false }// Convert hex to decimal , , , := h2I([2]), h2I([3]), h2I([4]), h2I([5])if == badHex || == badHex || == badHex || == badHex {returnutf8.RuneError, false }// Compose the hex digitsreturnrune(<<12 + <<8 + <<4 + ), true}// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,// which is used to describe UTF16 chars.// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Planefunc isUTF16EncodedRune( rune) bool {returnhighSurrogateOffset <= && <= basicMultilingualPlaneReservedOffset}func decodeUnicodeEscape( []byte) (rune, int) {if , := decodeSingleUnicodeEscape(); ! {// Invalid Unicode escapereturnutf8.RuneError, -1 } elseif <= basicMultilingualPlaneOffset && !isUTF16EncodedRune() {// Valid Unicode escape in Basic Multilingual Planereturn , 6 } elseif , := decodeSingleUnicodeEscape([6:]); ! { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain// UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate"returnutf8.RuneError, -1 } elseif < lowSurrogateOffset {// Invalid UTF16 "low surrogate"returnutf8.RuneError, -1 } else {// Valid UTF16 surrogate pairreturncombineUTF16Surrogates(, ), 12 }}// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]var backslashCharEscapeTable = [...]byte{'"': '"','\\': '\\','/': '/','b': '\b','f': '\f','n': '\n','r': '\r','t': '\t',}// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns// how many characters were consumed from 'in' and emitted into 'out'.// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error.func unescapeToUTF8(, []byte) ( int, int) {iflen() < 2 || [0] != '\\' {// Invalid escape due to insufficient characters for any escape or no initial backslashreturn -1, -1 }// https://tools.ietf.org/html/rfc7159#section-7switch := [1]; {case'"', '\\', '/', 'b', 'f', 'n', 'r', 't':// Valid basic 2-character escapes (use lookup table) [0] = backslashCharEscapeTable[]return2, 1case'u':// Unicode escapeif , := decodeUnicodeEscape(); == -1 {// Invalid Unicode escapereturn -1, -1 } else {// Valid Unicode escape; re-encode as UTF8 := utf8.EncodeRune(, )return , } }return -1, -1}// unescape unescapes the string contained in 'in' and returns it as a slice.// If 'in' contains no escaped characters:// Returns 'in'.// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)):// 'out' is used to build the unescaped string and is returned with no extra allocation// Else:// A new slice is allocated and returned.func (, []byte) ([]byte, error) { := bytes.IndexByte(, '\\')if == -1 {return , nil }// Get a buffer of sufficient size (allocate if needed)ifcap() < len() { = make([]byte, len()) } else { = [0:len()] }// Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice)copy(, [:]) = [:] := [:]forlen() > 0 {// Unescape the next escaped character , := unescapeToUTF8(, )if == -1 {returnnil, MalformedStringEscapeError } = [:] = [:]// Copy everything up until the next backslash := bytes.IndexByte(, '\\')if == -1 {copy(, ) = [len():]break } else {copy(, [:]) = [:] = [:] } }// Trim the out buffer to the amount that was actually emittedreturn [:len()-len()], nil}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.