// This files's string processing codes are inspired by https://github.com/segmentio/encoding. // The license notation is as follows. // // # MIT License // // Copyright (c) 2019 Segment.io, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE.
package encoder import ( ) const ( lsb = 0x0101010101010101 msb = 0x8080808080808080 ) var hex = "0123456789abcdef" //nolint:govet func stringToUint64Slice( string) []uint64 { return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ Data: ((*reflect.StringHeader)(unsafe.Pointer(&))).Data, Len: len() / 8, Cap: len() / 8, })) } func ( *RuntimeContext, []byte, string) []byte { if .Option.Flag&HTMLEscapeOption != 0 { if .Option.Flag&NormalizeUTF8Option != 0 { return appendNormalizedHTMLString(, ) } return appendHTMLString(, ) } if .Option.Flag&NormalizeUTF8Option != 0 { return appendNormalizedString(, ) } return appendString(, ) } func appendNormalizedHTMLString( []byte, string) []byte { := len() if == 0 { return append(, `""`...) } = append(, '"') var ( , int ) if >= 8 { := stringToUint64Slice() for , := range { // combine masks before checking for the MSB of each byte. We include // `n` in the mask to check whether any of the *input* byte MSBs were // set (i.e. the byte was outside the ASCII range). := | ( - (lsb * 0x20)) | (( ^ (lsb * '"')) - lsb) | (( ^ (lsb * '\\')) - lsb) | (( ^ (lsb * '<')) - lsb) | (( ^ (lsb * '>')) - lsb) | (( ^ (lsb * '&')) - lsb) if ( & msb) != 0 { = bits.TrailingZeros64(&msb) / 8 goto } } for := len() * 8; < ; ++ { if needEscapeHTMLNormalizeUTF8[[]] { = goto } } // no found any escape characters. return append(append(, ...), '"') } : for < { := [] if !needEscapeHTMLNormalizeUTF8[] { // fast path: most of the time, printable ascii characters are used ++ continue } switch { case '\\', '"': = append(, [:]...) = append(, '\\', ) = + 1 = + 1 continue case '\n': = append(, [:]...) = append(, '\\', 'n') = + 1 = + 1 continue case '\r': = append(, [:]...) = append(, '\\', 'r') = + 1 = + 1 continue case '\t': = append(, [:]...) = append(, '\\', 't') = + 1 = + 1 continue case '<', '>', '&': = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue } , := decodeRuneInString([:]) switch { case runeErrorState: = append(, [:]...) = append(, `\ufffd`...) = + 1 = + 1 continue // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. case lineSepState: = append(, [:]...) = append(, `\u2028`...) = + 3 = + 3 continue case paragraphSepState: = append(, [:]...) = append(, `\u2029`...) = + 3 = + 3 continue } += } return append(append(, [:]...), '"') } func appendHTMLString( []byte, string) []byte { := len() if == 0 { return append(, `""`...) } = append(, '"') var ( , int ) if >= 8 { := stringToUint64Slice() for , := range { // combine masks before checking for the MSB of each byte. We include // `n` in the mask to check whether any of the *input* byte MSBs were // set (i.e. the byte was outside the ASCII range). := | ( - (lsb * 0x20)) | (( ^ (lsb * '"')) - lsb) | (( ^ (lsb * '\\')) - lsb) | (( ^ (lsb * '<')) - lsb) | (( ^ (lsb * '>')) - lsb) | (( ^ (lsb * '&')) - lsb) if ( & msb) != 0 { = bits.TrailingZeros64(&msb) / 8 goto } } for := len() * 8; < ; ++ { if needEscapeHTML[[]] { = goto } } // no found any escape characters. return append(append(, ...), '"') } : for < { := [] if !needEscapeHTML[] { // fast path: most of the time, printable ascii characters are used ++ continue } switch { case '\\', '"': = append(, [:]...) = append(, '\\', ) = + 1 = + 1 continue case '\n': = append(, [:]...) = append(, '\\', 'n') = + 1 = + 1 continue case '\r': = append(, [:]...) = append(, '\\', 'r') = + 1 = + 1 continue case '\t': = append(, [:]...) = append(, '\\', 't') = + 1 = + 1 continue case '<', '>', '&': = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue } ++ } return append(append(, [:]...), '"') } func appendNormalizedString( []byte, string) []byte { := len() if == 0 { return append(, `""`...) } = append(, '"') var ( , int ) if >= 8 { := stringToUint64Slice() for , := range { // combine masks before checking for the MSB of each byte. We include // `n` in the mask to check whether any of the *input* byte MSBs were // set (i.e. the byte was outside the ASCII range). := | ( - (lsb * 0x20)) | (( ^ (lsb * '"')) - lsb) | (( ^ (lsb * '\\')) - lsb) if ( & msb) != 0 { = bits.TrailingZeros64(&msb) / 8 goto } } := len() for := len() * 8; < ; ++ { if needEscapeNormalizeUTF8[[]] { = goto } } return append(append(, ...), '"') } : for < { := [] if !needEscapeNormalizeUTF8[] { // fast path: most of the time, printable ascii characters are used ++ continue } switch { case '\\', '"': = append(, [:]...) = append(, '\\', ) = + 1 = + 1 continue case '\n': = append(, [:]...) = append(, '\\', 'n') = + 1 = + 1 continue case '\r': = append(, [:]...) = append(, '\\', 'r') = + 1 = + 1 continue case '\t': = append(, [:]...) = append(, '\\', 't') = + 1 = + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue } , := decodeRuneInString([:]) switch { case runeErrorState: = append(, [:]...) = append(, `\ufffd`...) = + 1 = + 1 continue // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. case lineSepState: = append(, [:]...) = append(, `\u2028`...) = + 3 = + 3 continue case paragraphSepState: = append(, [:]...) = append(, `\u2029`...) = + 3 = + 3 continue } += } return append(append(, [:]...), '"') } func appendString( []byte, string) []byte { := len() if == 0 { return append(, `""`...) } = append(, '"') var ( , int ) if >= 8 { := stringToUint64Slice() for , := range { // combine masks before checking for the MSB of each byte. We include // `n` in the mask to check whether any of the *input* byte MSBs were // set (i.e. the byte was outside the ASCII range). := | ( - (lsb * 0x20)) | (( ^ (lsb * '"')) - lsb) | (( ^ (lsb * '\\')) - lsb) if ( & msb) != 0 { = bits.TrailingZeros64(&msb) / 8 goto } } := len() for := len() * 8; < ; ++ { if needEscape[[]] { = goto } } return append(append(, ...), '"') } : for < { := [] if !needEscape[] { // fast path: most of the time, printable ascii characters are used ++ continue } switch { case '\\', '"': = append(, [:]...) = append(, '\\', ) = + 1 = + 1 continue case '\n': = append(, [:]...) = append(, '\\', 'n') = + 1 = + 1 continue case '\r': = append(, [:]...) = append(, '\\', 'r') = + 1 = + 1 continue case '\t': = append(, [:]...) = append(, '\\', 't') = + 1 = + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F = append(, [:]...) = append(, `\u00`...) = append(, hex[>>4], hex[&0xF]) = + 1 = + 1 continue } ++ } return append(append(, [:]...), '"') }