package goja
import (
"fmt"
"github.com/dlclark/regexp2"
"github.com/dop251/goja/unistring"
"io"
"regexp"
"sort"
"strings"
"unicode/utf16"
)
type regexp2MatchCache struct {
target String
runes []rune
posMap []int
}
type regexp2Wrapper struct {
rx *regexp2 .Regexp
cache *regexp2MatchCache
}
type regexpWrapper regexp .Regexp
type positionMapItem struct {
src, dst int
}
type positionMap []positionMapItem
func (m positionMap ) get (src int ) int {
if src <= 0 {
return src
}
res := sort .Search (len (m ), func (n int ) bool { return m [n ].src >= src })
if res >= len (m ) || m [res ].src != src {
panic ("index not found" )
}
return m [res ].dst
}
type arrayRuneReader struct {
runes []rune
pos int
}
func (rd *arrayRuneReader ) ReadRune () (r rune , size int , err error ) {
if rd .pos < len (rd .runes ) {
r = rd .runes [rd .pos ]
size = 1
rd .pos ++
} else {
err = io .EOF
}
return
}
type regexpPattern struct {
src string
global, ignoreCase, multiline, dotAll, sticky, unicode bool
regexpWrapper *regexpWrapper
regexp2Wrapper *regexp2Wrapper
}
func compileRegexp2(src string , multiline , dotAll , ignoreCase , unicode bool ) (*regexp2Wrapper , error ) {
var opts regexp2 .RegexOptions = regexp2 .ECMAScript
if multiline {
opts |= regexp2 .Multiline
}
if dotAll {
opts |= regexp2 .Singleline
}
if ignoreCase {
opts |= regexp2 .IgnoreCase
}
if unicode {
opts |= regexp2 .Unicode
}
regexp2Pattern , err1 := regexp2 .Compile (src , opts )
if err1 != nil {
return nil , fmt .Errorf ("Invalid regular expression (regexp2): %s (%v)" , src , err1 )
}
return ®exp2Wrapper {rx : regexp2Pattern }, nil
}
func (p *regexpPattern ) createRegexp2 () {
if p .regexp2Wrapper != nil {
return
}
rx , err := compileRegexp2 (p .src , p .multiline , p .dotAll , p .ignoreCase , p .unicode )
if err != nil {
panic (err )
}
p .regexp2Wrapper = rx
}
func buildUTF8PosMap(s unicodeString ) (positionMap , string ) {
pm := make (positionMap , 0 , s .Length ())
rd := s .Reader ()
sPos , utf8Pos := 0 , 0
var sb strings .Builder
for {
r , size , err := rd .ReadRune ()
if err == io .EOF {
break
}
if err != nil {
return nil , ""
}
utf8Size , _ := sb .WriteRune (r )
sPos += size
utf8Pos += utf8Size
pm = append (pm , positionMapItem {src : utf8Pos , dst : sPos })
}
return pm , sb .String ()
}
func (p *regexpPattern ) findSubmatchIndex (s String , start int ) []int {
if p .regexpWrapper == nil {
return p .regexp2Wrapper .findSubmatchIndex (s , start , p .unicode , p .global || p .sticky )
}
if start != 0 {
p .createRegexp2 ()
return p .regexp2Wrapper .findSubmatchIndex (s , start , p .unicode , p .global || p .sticky )
}
return p .regexpWrapper .findSubmatchIndex (s , p .unicode )
}
func (p *regexpPattern ) findAllSubmatchIndex (s String , start int , limit int , sticky bool ) [][]int {
if p .regexpWrapper == nil {
return p .regexp2Wrapper .findAllSubmatchIndex (s , start , limit , sticky , p .unicode )
}
if start == 0 {
a , u := devirtualizeString (s )
if u == nil {
return p .regexpWrapper .findAllSubmatchIndex (string (a ), limit , sticky )
}
if limit == 1 {
result := p .regexpWrapper .findSubmatchIndexUnicode (u , p .unicode )
if result == nil {
return nil
}
return [][]int {result }
}
if p .unicode {
pm , str := buildUTF8PosMap (u )
if pm != nil {
res := p .regexpWrapper .findAllSubmatchIndex (str , limit , sticky )
for _ , result := range res {
for i , idx := range result {
result [i ] = pm .get (idx )
}
}
return res
}
}
}
p .createRegexp2 ()
return p .regexp2Wrapper .findAllSubmatchIndex (s , start , limit , sticky , p .unicode )
}
func (p *regexpPattern ) clone () *regexpPattern {
ret := ®expPattern {
src : p .src ,
global : p .global ,
ignoreCase : p .ignoreCase ,
multiline : p .multiline ,
dotAll : p .dotAll ,
sticky : p .sticky ,
unicode : p .unicode ,
}
if p .regexpWrapper != nil {
ret .regexpWrapper = p .regexpWrapper .clone ()
}
if p .regexp2Wrapper != nil {
ret .regexp2Wrapper = p .regexp2Wrapper .clone ()
}
return ret
}
type regexpObject struct {
baseObject
pattern *regexpPattern
source String
standard bool
}
func (r *regexp2Wrapper ) findSubmatchIndex (s String , start int , fullUnicode , doCache bool ) (result []int ) {
if fullUnicode {
return r .findSubmatchIndexUnicode (s , start , doCache )
}
return r .findSubmatchIndexUTF16 (s , start , doCache )
}
func (r *regexp2Wrapper ) findUTF16Cached (s String , start int , doCache bool ) (match *regexp2 .Match , runes []rune , err error ) {
wrapped := r .rx
cache := r .cache
if cache != nil && cache .posMap == nil && cache .target .SameAs (s ) {
runes = cache .runes
} else {
runes = s .utf16Runes ()
cache = nil
}
match , err = wrapped .FindRunesMatchStartingAt (runes , start )
if doCache && match != nil && err == nil {
if cache == nil {
if r .cache == nil {
r .cache = new (regexp2MatchCache )
}
*r .cache = regexp2MatchCache {
target : s ,
runes : runes ,
}
}
} else {
r .cache = nil
}
return
}
func (r *regexp2Wrapper ) findSubmatchIndexUTF16 (s String , start int , doCache bool ) (result []int ) {
match , _ , err := r .findUTF16Cached (s , start , doCache )
if err != nil {
return
}
if match == nil {
return
}
groups := match .Groups ()
result = make ([]int , 0 , len (groups )<<1 )
for _ , group := range groups {
if len (group .Captures ) > 0 {
result = append (result , group .Index , group .Index +group .Length )
} else {
result = append (result , -1 , 0 )
}
}
return
}
func (r *regexp2Wrapper ) findUnicodeCached (s String , start int , doCache bool ) (match *regexp2 .Match , posMap []int , err error ) {
var (
runes []rune
mappedStart int
splitPair bool
savedRune rune
)
wrapped := r .rx
cache := r .cache
if cache != nil && cache .posMap != nil && cache .target .SameAs (s ) {
runes , posMap = cache .runes , cache .posMap
mappedStart , splitPair = posMapReverseLookup (posMap , start )
} else {
posMap , runes , mappedStart , splitPair = buildPosMap (&lenientUtf16Decoder {utf16Reader : s .utf16Reader ()}, s .Length (), start )
cache = nil
}
if splitPair {
_ , second := utf16 .EncodeRune (runes [mappedStart ])
savedRune , runes [mappedStart ] = runes [mappedStart ], second
}
match , err = wrapped .FindRunesMatchStartingAt (runes , mappedStart )
if doCache && match != nil && err == nil {
if splitPair {
runes [mappedStart ] = savedRune
}
if cache == nil {
if r .cache == nil {
r .cache = new (regexp2MatchCache )
}
*r .cache = regexp2MatchCache {
target : s ,
runes : runes ,
posMap : posMap ,
}
}
} else {
r .cache = nil
}
return
}
func (r *regexp2Wrapper ) findSubmatchIndexUnicode (s String , start int , doCache bool ) (result []int ) {
match , posMap , err := r .findUnicodeCached (s , start , doCache )
if match == nil || err != nil {
return
}
groups := match .Groups ()
result = make ([]int , 0 , len (groups )<<1 )
for _ , group := range groups {
if len (group .Captures ) > 0 {
result = append (result , posMap [group .Index ], posMap [group .Index +group .Length ])
} else {
result = append (result , -1 , 0 )
}
}
return
}
func (r *regexp2Wrapper ) findAllSubmatchIndexUTF16 (s String , start , limit int , sticky bool ) [][]int {
wrapped := r .rx
match , runes , err := r .findUTF16Cached (s , start , false )
if match == nil || err != nil {
return nil
}
if limit < 0 {
limit = len (runes ) + 1
}
results := make ([][]int , 0 , limit )
for match != nil {
groups := match .Groups ()
result := make ([]int , 0 , len (groups )<<1 )
for _ , group := range groups {
if len (group .Captures ) > 0 {
startPos := group .Index
endPos := group .Index + group .Length
result = append (result , startPos , endPos )
} else {
result = append (result , -1 , 0 )
}
}
if sticky && len (result ) > 1 {
if result [0 ] != start {
break
}
start = result [1 ]
}
results = append (results , result )
limit --
if limit <= 0 {
break
}
match , err = wrapped .FindNextMatch (match )
if err != nil {
return nil
}
}
return results
}
func buildPosMap(rd io .RuneReader , l , start int ) (posMap []int , runes []rune , mappedStart int , splitPair bool ) {
posMap = make ([]int , 0 , l +1 )
curPos := 0
runes = make ([]rune , 0 , l )
startFound := false
for {
if !startFound {
if curPos == start {
mappedStart = len (runes )
startFound = true
}
if curPos > start {
mappedStart = len (runes ) - 1
splitPair = true
startFound = true
}
}
rn , size , err := rd .ReadRune ()
if err != nil {
break
}
runes = append (runes , rn )
posMap = append (posMap , curPos )
curPos += size
}
posMap = append (posMap , curPos )
return
}
func posMapReverseLookup(posMap []int , pos int ) (int , bool ) {
mapped := sort .SearchInts (posMap , pos )
if mapped < len (posMap ) && posMap [mapped ] != pos {
return mapped - 1 , true
}
return mapped , false
}
func (r *regexp2Wrapper ) findAllSubmatchIndexUnicode (s unicodeString , start , limit int , sticky bool ) [][]int {
wrapped := r .rx
if limit < 0 {
limit = len (s ) + 1
}
results := make ([][]int , 0 , limit )
match , posMap , err := r .findUnicodeCached (s , start , false )
if err != nil {
return nil
}
for match != nil {
groups := match .Groups ()
result := make ([]int , 0 , len (groups )<<1 )
for _ , group := range groups {
if len (group .Captures ) > 0 {
start := posMap [group .Index ]
end := posMap [group .Index +group .Length ]
result = append (result , start , end )
} else {
result = append (result , -1 , 0 )
}
}
if sticky && len (result ) > 1 {
if result [0 ] != start {
break
}
start = result [1 ]
}
results = append (results , result )
match , err = wrapped .FindNextMatch (match )
if err != nil {
return nil
}
}
return results
}
func (r *regexp2Wrapper ) findAllSubmatchIndex (s String , start , limit int , sticky , fullUnicode bool ) [][]int {
a , u := devirtualizeString (s )
if u != nil {
if fullUnicode {
return r .findAllSubmatchIndexUnicode (u , start , limit , sticky )
}
return r .findAllSubmatchIndexUTF16 (u , start , limit , sticky )
}
return r .findAllSubmatchIndexUTF16 (a , start , limit , sticky )
}
func (r *regexp2Wrapper ) clone () *regexp2Wrapper {
return ®exp2Wrapper {
rx : r .rx ,
}
}
func (r *regexpWrapper ) findAllSubmatchIndex (s string , limit int , sticky bool ) (results [][]int ) {
wrapped := (*regexp .Regexp )(r )
results = wrapped .FindAllStringSubmatchIndex (s , limit )
pos := 0
if sticky {
for i , result := range results {
if len (result ) > 1 {
if result [0 ] != pos {
return results [:i ]
}
pos = result [1 ]
}
}
}
return
}
func (r *regexpWrapper ) findSubmatchIndex (s String , fullUnicode bool ) []int {
a , u := devirtualizeString (s )
if u != nil {
return r .findSubmatchIndexUnicode (u , fullUnicode )
}
return r .findSubmatchIndexASCII (string (a ))
}
func (r *regexpWrapper ) findSubmatchIndexASCII (s string ) []int {
wrapped := (*regexp .Regexp )(r )
return wrapped .FindStringSubmatchIndex (s )
}
func (r *regexpWrapper ) findSubmatchIndexUnicode (s unicodeString , fullUnicode bool ) (result []int ) {
wrapped := (*regexp .Regexp )(r )
if fullUnicode {
posMap , runes , _ , _ := buildPosMap (&lenientUtf16Decoder {utf16Reader : s .utf16Reader ()}, s .Length (), 0 )
res := wrapped .FindReaderSubmatchIndex (&arrayRuneReader {runes : runes })
for i , item := range res {
if item >= 0 {
res [i ] = posMap [item ]
}
}
return res
}
return wrapped .FindReaderSubmatchIndex (s .utf16RuneReader ())
}
func (r *regexpWrapper ) clone () *regexpWrapper {
return r
}
func (r *regexpObject ) execResultToArray (target String , result []int ) Value {
captureCount := len (result ) >> 1
valueArray := make ([]Value , captureCount )
matchIndex := result [0 ]
valueArray [0 ] = target .Substring (result [0 ], result [1 ])
lowerBound := 0
for index := 1 ; index < captureCount ; index ++ {
offset := index << 1
if result [offset ] >= 0 && result [offset +1 ] >= lowerBound {
valueArray [index ] = target .Substring (result [offset ], result [offset +1 ])
lowerBound = result [offset ]
} else {
valueArray [index ] = _undefined
}
}
match := r .val .runtime .newArrayValues (valueArray )
match .self .setOwnStr ("input" , target , false )
match .self .setOwnStr ("index" , intToValue (int64 (matchIndex )), false )
return match
}
func (r *regexpObject ) getLastIndex () int64 {
lastIndex := toLength (r .getStr ("lastIndex" , nil ))
if !r .pattern .global && !r .pattern .sticky {
return 0
}
return lastIndex
}
func (r *regexpObject ) updateLastIndex (index int64 , firstResult , lastResult []int ) bool {
if r .pattern .sticky {
if firstResult == nil || int64 (firstResult [0 ]) != index {
r .setOwnStr ("lastIndex" , intToValue (0 ), true )
return false
}
} else {
if firstResult == nil {
if r .pattern .global {
r .setOwnStr ("lastIndex" , intToValue (0 ), true )
}
return false
}
}
if r .pattern .global || r .pattern .sticky {
r .setOwnStr ("lastIndex" , intToValue (int64 (lastResult [1 ])), true )
}
return true
}
func (r *regexpObject ) execRegexp (target String ) (match bool , result []int ) {
index := r .getLastIndex ()
if index >= 0 && index <= int64 (target .Length ()) {
result = r .pattern .findSubmatchIndex (target , int (index ))
}
match = r .updateLastIndex (index , result , result )
return
}
func (r *regexpObject ) exec (target String ) Value {
match , result := r .execRegexp (target )
if match {
return r .execResultToArray (target , result )
}
return _null
}
func (r *regexpObject ) test (target String ) bool {
match , _ := r .execRegexp (target )
return match
}
func (r *regexpObject ) clone () *regexpObject {
r1 := r .val .runtime .newRegexpObject (r .prototype )
r1 .source = r .source
r1 .pattern = r .pattern
return r1
}
func (r *regexpObject ) init () {
r .baseObject .init ()
r .standard = true
r ._putProp ("lastIndex" , intToValue (0 ), true , false , false )
}
func (r *regexpObject ) setProto (proto *Object , throw bool ) bool {
res := r .baseObject .setProto (proto , throw )
if res {
r .standard = false
}
return res
}
func (r *regexpObject ) defineOwnPropertyStr (name unistring .String , desc PropertyDescriptor , throw bool ) bool {
res := r .baseObject .defineOwnPropertyStr (name , desc , throw )
if res {
r .standard = false
}
return res
}
func (r *regexpObject ) defineOwnPropertySym (name *Symbol , desc PropertyDescriptor , throw bool ) bool {
res := r .baseObject .defineOwnPropertySym (name , desc , throw )
if res && r .standard {
switch name {
case SymMatch , SymMatchAll , SymSearch , SymSplit , SymReplace :
r .standard = false
}
}
return res
}
func (r *regexpObject ) deleteStr (name unistring .String , throw bool ) bool {
res := r .baseObject .deleteStr (name , throw )
if res {
r .standard = false
}
return res
}
func (r *regexpObject ) setOwnStr (name unistring .String , value Value , throw bool ) bool {
res := r .baseObject .setOwnStr (name , value , throw )
if res && r .standard && name == "exec" {
r .standard = false
}
return res
}
func (r *regexpObject ) setOwnSym (name *Symbol , value Value , throw bool ) bool {
res := r .baseObject .setOwnSym (name , value , throw )
if res && r .standard {
switch name {
case SymMatch , SymMatchAll , SymSearch , SymSplit , SymReplace :
r .standard = false
}
}
return res
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .