package syntax
import (
"bytes"
"fmt"
"math"
"os"
)
func Write (tree *RegexTree ) (*Code , error ) {
w := writer {
intStack : make ([]int , 0 , 32 ),
emitted : make ([]int , 2 ),
stringhash : make (map [string ]int ),
sethash : make (map [string ]int ),
}
code , err := w .codeFromTree (tree )
if tree .options &Debug > 0 && code != nil {
os .Stdout .WriteString (code .Dump ())
os .Stdout .WriteString ("\n" )
}
return code , err
}
type writer struct {
emitted []int
intStack []int
curpos int
stringhash map [string ]int
stringtable [][]rune
sethash map [string ]int
settable []*CharSet
counting bool
count int
trackcount int
caps map [int ]int
}
const (
beforeChild nodeType = 64
afterChild = 128
MaxPrefixSize = 50
)
func (w *writer ) codeFromTree (tree *RegexTree ) (*Code , error ) {
var (
curNode *regexNode
curChild int
capsize int
)
if tree .capnumlist == nil || tree .captop == len (tree .capnumlist ) {
capsize = tree .captop
w .caps = nil
} else {
capsize = len (tree .capnumlist )
w .caps = tree .caps
for i := 0 ; i < len (tree .capnumlist ); i ++ {
w .caps [tree .capnumlist [i ]] = i
}
}
w .counting = true
for {
if !w .counting {
w .emitted = make ([]int , w .count )
}
curNode = tree .root
curChild = 0
w .emit1 (Lazybranch , 0 )
for {
if len (curNode .children ) == 0 {
w .emitFragment (curNode .t , curNode , 0 )
} else if curChild < len (curNode .children ) {
w .emitFragment (curNode .t |beforeChild , curNode , curChild )
curNode = curNode .children [curChild ]
w .pushInt (curChild )
curChild = 0
continue
}
if w .emptyStack () {
break
}
curChild = w .popInt ()
curNode = curNode .next
w .emitFragment (curNode .t |afterChild , curNode , curChild )
curChild ++
}
w .patchJump (0 , w .curPos ())
w .emit (Stop )
if !w .counting {
break
}
w .counting = false
}
fcPrefix := getFirstCharsPrefix (tree )
prefix := getPrefix (tree )
rtl := (tree .options & RightToLeft ) != 0
var bmPrefix *BmPrefix
if prefix != nil && len (prefix .PrefixStr ) > 0 && MaxPrefixSize > 0 {
if len (prefix .PrefixStr ) > MaxPrefixSize {
prefix .PrefixStr = prefix .PrefixStr [:MaxPrefixSize ]
}
bmPrefix = newBmPrefix (prefix .PrefixStr , prefix .CaseInsensitive , rtl )
} else {
bmPrefix = nil
}
return &Code {
Codes : w .emitted ,
Strings : w .stringtable ,
Sets : w .settable ,
TrackCount : w .trackcount ,
Caps : w .caps ,
Capsize : capsize ,
FcPrefix : fcPrefix ,
BmPrefix : bmPrefix ,
Anchors : getAnchors (tree ),
RightToLeft : rtl ,
}, nil
}
func (w *writer ) emitFragment (nodetype nodeType , node *regexNode , curIndex int ) error {
bits := InstOp (0 )
if nodetype <= ntRef {
if (node .options & RightToLeft ) != 0 {
bits |= Rtl
}
if (node .options & IgnoreCase ) != 0 {
bits |= Ci
}
}
ntBits := nodeType (bits )
switch nodetype {
case ntConcatenate | beforeChild , ntConcatenate | afterChild , ntEmpty :
break
case ntAlternate | beforeChild :
if curIndex < len (node .children )-1 {
w .pushInt (w .curPos ())
w .emit1 (Lazybranch , 0 )
}
case ntAlternate | afterChild :
if curIndex < len (node .children )-1 {
lbPos := w .popInt ()
w .pushInt (w .curPos ())
w .emit1 (Goto , 0 )
w .patchJump (lbPos , w .curPos ())
} else {
for i := 0 ; i < curIndex ; i ++ {
w .patchJump (w .popInt (), w .curPos ())
}
}
break
case ntTestref | beforeChild :
if curIndex == 0 {
w .emit (Setjump )
w .pushInt (w .curPos ())
w .emit1 (Lazybranch , 0 )
w .emit1 (Testref , w .mapCapnum (node .m ))
w .emit (Forejump )
}
case ntTestref | afterChild :
if curIndex == 0 {
branchpos := w .popInt ()
w .pushInt (w .curPos ())
w .emit1 (Goto , 0 )
w .patchJump (branchpos , w .curPos ())
w .emit (Forejump )
if len (node .children ) <= 1 {
w .patchJump (w .popInt (), w .curPos ())
}
} else if curIndex == 1 {
w .patchJump (w .popInt (), w .curPos ())
}
case ntTestgroup | beforeChild :
if curIndex == 0 {
w .emit (Setjump )
w .emit (Setmark )
w .pushInt (w .curPos ())
w .emit1 (Lazybranch , 0 )
}
case ntTestgroup | afterChild :
if curIndex == 0 {
w .emit (Getmark )
w .emit (Forejump )
} else if curIndex == 1 {
Branchpos := w .popInt ()
w .pushInt (w .curPos ())
w .emit1 (Goto , 0 )
w .patchJump (Branchpos , w .curPos ())
w .emit (Getmark )
w .emit (Forejump )
if len (node .children ) <= 2 {
w .patchJump (w .popInt (), w .curPos ())
}
} else if curIndex == 2 {
w .patchJump (w .popInt (), w .curPos ())
}
case ntLoop | beforeChild , ntLazyloop | beforeChild :
if node .n < math .MaxInt32 || node .m > 1 {
if node .m == 0 {
w .emit1 (Nullcount , 0 )
} else {
w .emit1 (Setcount , 1 -node .m )
}
} else if node .m == 0 {
w .emit (Nullmark )
} else {
w .emit (Setmark )
}
if node .m == 0 {
w .pushInt (w .curPos ())
w .emit1 (Goto , 0 )
}
w .pushInt (w .curPos ())
case ntLoop | afterChild , ntLazyloop | afterChild :
startJumpPos := w .curPos ()
lazy := (nodetype - (ntLoop | afterChild ))
if node .n < math .MaxInt32 || node .m > 1 {
if node .n == math .MaxInt32 {
w .emit2 (InstOp (Branchcount +lazy ), w .popInt (), math .MaxInt32 )
} else {
w .emit2 (InstOp (Branchcount +lazy ), w .popInt (), node .n -node .m )
}
} else {
w .emit1 (InstOp (Branchmark +lazy ), w .popInt ())
}
if node .m == 0 {
w .patchJump (w .popInt (), startJumpPos )
}
case ntGroup | beforeChild , ntGroup | afterChild :
case ntCapture | beforeChild :
w .emit (Setmark )
case ntCapture | afterChild :
w .emit2 (Capturemark , w .mapCapnum (node .m ), w .mapCapnum (node .n ))
case ntRequire | beforeChild :
w .emit (Setjump )
w .emit (Setmark )
case ntRequire | afterChild :
w .emit (Getmark )
w .emit (Forejump )
case ntPrevent | beforeChild :
w .emit (Setjump )
w .pushInt (w .curPos ())
w .emit1 (Lazybranch , 0 )
case ntPrevent | afterChild :
w .emit (Backjump )
w .patchJump (w .popInt (), w .curPos ())
w .emit (Forejump )
case ntGreedy | beforeChild :
w .emit (Setjump )
case ntGreedy | afterChild :
w .emit (Forejump )
case ntOne , ntNotone :
w .emit1 (InstOp (node .t |ntBits ), int (node .ch ))
case ntNotoneloop , ntNotonelazy , ntOneloop , ntOnelazy :
if node .m > 0 {
if node .t == ntOneloop || node .t == ntOnelazy {
w .emit2 (Onerep |bits , int (node .ch ), node .m )
} else {
w .emit2 (Notonerep |bits , int (node .ch ), node .m )
}
}
if node .n > node .m {
if node .n == math .MaxInt32 {
w .emit2 (InstOp (node .t |ntBits ), int (node .ch ), math .MaxInt32 )
} else {
w .emit2 (InstOp (node .t |ntBits ), int (node .ch ), node .n -node .m )
}
}
case ntSetloop , ntSetlazy :
if node .m > 0 {
w .emit2 (Setrep |bits , w .setCode (node .set ), node .m )
}
if node .n > node .m {
if node .n == math .MaxInt32 {
w .emit2 (InstOp (node .t |ntBits ), w .setCode (node .set ), math .MaxInt32 )
} else {
w .emit2 (InstOp (node .t |ntBits ), w .setCode (node .set ), node .n -node .m )
}
}
case ntMulti :
w .emit1 (InstOp (node .t |ntBits ), w .stringCode (node .str ))
case ntSet :
w .emit1 (InstOp (node .t |ntBits ), w .setCode (node .set ))
case ntRef :
w .emit1 (InstOp (node .t |ntBits ), w .mapCapnum (node .m ))
case ntNothing , ntBol , ntEol , ntBoundary , ntNonboundary , ntECMABoundary , ntNonECMABoundary , ntBeginning , ntStart , ntEndZ , ntEnd :
w .emit (InstOp (node .t ))
default :
return fmt .Errorf ("unexpected opcode in regular expression generation: %v" , nodetype )
}
return nil
}
func (w *writer ) pushInt (i int ) {
w .intStack = append (w .intStack , i )
}
func (w *writer ) emptyStack () bool {
return len (w .intStack ) == 0
}
func (w *writer ) popInt () int {
idx := len (w .intStack ) - 1
i := w .intStack [idx ]
w .intStack = w .intStack [:idx ]
return i
}
func (w *writer ) curPos () int {
return w .curpos
}
func (w *writer ) patchJump (offset , jumpDest int ) {
w .emitted [offset +1 ] = jumpDest
}
func (w *writer ) setCode (set *CharSet ) int {
if w .counting {
return 0
}
buf := &bytes .Buffer {}
set .mapHashFill (buf )
hash := buf .String ()
i , ok := w .sethash [hash ]
if !ok {
i = len (w .sethash )
w .sethash [hash ] = i
w .settable = append (w .settable , set )
}
return i
}
func (w *writer ) stringCode (str []rune ) int {
if w .counting {
return 0
}
hash := string (str )
i , ok := w .stringhash [hash ]
if !ok {
i = len (w .stringhash )
w .stringhash [hash ] = i
w .stringtable = append (w .stringtable , str )
}
return i
}
func (w *writer ) mapCapnum (capnum int ) int {
if capnum == -1 {
return -1
}
if w .caps != nil {
return w .caps [capnum ]
}
return capnum
}
func (w *writer ) emit (op InstOp ) {
if w .counting {
w .count ++
if opcodeBacktracks (op ) {
w .trackcount ++
}
return
}
w .emitted [w .curpos ] = int (op )
w .curpos ++
}
func (w *writer ) emit1 (op InstOp , opd1 int ) {
if w .counting {
w .count += 2
if opcodeBacktracks (op ) {
w .trackcount ++
}
return
}
w .emitted [w .curpos ] = int (op )
w .curpos ++
w .emitted [w .curpos ] = opd1
w .curpos ++
}
func (w *writer ) emit2 (op InstOp , opd1 , opd2 int ) {
if w .counting {
w .count += 3
if opcodeBacktracks (op ) {
w .trackcount ++
}
return
}
w .emitted [w .curpos ] = int (op )
w .curpos ++
w .emitted [w .curpos ] = opd1
w .curpos ++
w .emitted [w .curpos ] = opd2
w .curpos ++
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .