package syntax
import (
"bytes"
"fmt"
"math"
"strconv"
)
type RegexTree struct {
root *regexNode
caps map [int ]int
capnumlist []int
captop int
Capnames map [string ]int
Caplist []string
options RegexOptions
}
type regexNode struct {
t nodeType
children []*regexNode
str []rune
set *CharSet
ch rune
m int
n int
options RegexOptions
next *regexNode
}
type nodeType int32
const (
ntOnerep nodeType = 0
ntNotonerep = 1
ntSetrep = 2
ntOneloop = 3
ntNotoneloop = 4
ntSetloop = 5
ntOnelazy = 6
ntNotonelazy = 7
ntSetlazy = 8
ntOne = 9
ntNotone = 10
ntSet = 11
ntMulti = 12
ntRef = 13
ntBol = 14
ntEol = 15
ntBoundary = 16
ntNonboundary = 17
ntBeginning = 18
ntStart = 19
ntEndZ = 20
ntEnd = 21
ntNothing = 22
ntEmpty = 23
ntAlternate = 24
ntConcatenate = 25
ntLoop = 26
ntLazyloop = 27
ntCapture = 28
ntGroup = 29
ntRequire = 30
ntPrevent = 31
ntGreedy = 32
ntTestref = 33
ntTestgroup = 34
ntECMABoundary = 41
ntNonECMABoundary = 42
)
func newRegexNode(t nodeType , opt RegexOptions ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
}
}
func newRegexNodeCh(t nodeType , opt RegexOptions , ch rune ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
ch : ch ,
}
}
func newRegexNodeStr(t nodeType , opt RegexOptions , str []rune ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
str : str ,
}
}
func newRegexNodeSet(t nodeType , opt RegexOptions , set *CharSet ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
set : set ,
}
}
func newRegexNodeM(t nodeType , opt RegexOptions , m int ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
m : m ,
}
}
func newRegexNodeMN(t nodeType , opt RegexOptions , m , n int ) *regexNode {
return ®exNode {
t : t ,
options : opt ,
m : m ,
n : n ,
}
}
func (n *regexNode ) writeStrToBuf (buf *bytes .Buffer ) {
for i := 0 ; i < len (n .str ); i ++ {
buf .WriteRune (n .str [i ])
}
}
func (n *regexNode ) addChild (child *regexNode ) {
reduced := child .reduce ()
n .children = append (n .children , reduced )
reduced .next = n
}
func (n *regexNode ) insertChildren (afterIndex int , nodes []*regexNode ) {
newChildren := make ([]*regexNode , 0 , len (n .children )+len (nodes ))
n .children = append (append (append (newChildren , n .children [:afterIndex ]...), nodes ...), n .children [afterIndex :]...)
}
func (n *regexNode ) removeChildren (startIndex , endIndex int ) {
n .children = append (n .children [:startIndex ], n .children [endIndex :]...)
}
func (n *regexNode ) makeRep (t nodeType , min , max int ) {
n .t += (t - ntOne )
n .m = min
n .n = max
}
func (n *regexNode ) reduce () *regexNode {
switch n .t {
case ntAlternate :
return n .reduceAlternation ()
case ntConcatenate :
return n .reduceConcatenation ()
case ntLoop , ntLazyloop :
return n .reduceRep ()
case ntGroup :
return n .reduceGroup ()
case ntSet , ntSetloop :
return n .reduceSet ()
default :
return n
}
}
func (n *regexNode ) reduceAlternation () *regexNode {
if len (n .children ) == 0 {
return newRegexNode (ntNothing , n .options )
}
wasLastSet := false
lastNodeCannotMerge := false
var optionsLast RegexOptions
var i , j int
for i , j = 0 , 0 ; i < len (n .children ); i , j = i +1 , j +1 {
at := n .children [i ]
if j < i {
n .children [j ] = at
}
for {
if at .t == ntAlternate {
for k := 0 ; k < len (at .children ); k ++ {
at .children [k ].next = n
}
n .insertChildren (i +1 , at .children )
j --
} else if at .t == ntSet || at .t == ntOne {
optionsAt := at .options & (RightToLeft | IgnoreCase )
if at .t == ntSet {
if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at .set .IsMergeable () {
wasLastSet = true
lastNodeCannotMerge = !at .set .IsMergeable ()
optionsLast = optionsAt
break
}
} else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
wasLastSet = true
lastNodeCannotMerge = false
optionsLast = optionsAt
break
}
j --
prev := n .children [j ]
var prevCharClass *CharSet
if prev .t == ntOne {
prevCharClass = &CharSet {}
prevCharClass .addChar (prev .ch )
} else {
prevCharClass = prev .set
}
if at .t == ntOne {
prevCharClass .addChar (at .ch )
} else {
prevCharClass .addSet (*at .set )
}
prev .t = ntSet
prev .set = prevCharClass
} else if at .t == ntNothing {
j --
} else {
wasLastSet = false
lastNodeCannotMerge = false
}
break
}
}
if j < i {
n .removeChildren (j , i )
}
return n .stripEnation (ntNothing )
}
func (n *regexNode ) reduceConcatenation () *regexNode {
var optionsLast RegexOptions
var optionsAt RegexOptions
var i , j int
if len (n .children ) == 0 {
return newRegexNode (ntEmpty , n .options )
}
wasLastString := false
for i , j = 0 , 0 ; i < len (n .children ); i , j = i +1 , j +1 {
var at , prev *regexNode
at = n .children [i ]
if j < i {
n .children [j ] = at
}
if at .t == ntConcatenate &&
((at .options & RightToLeft ) == (n .options & RightToLeft )) {
for k := 0 ; k < len (at .children ); k ++ {
at .children [k ].next = n
}
n .insertChildren (i +1 , at .children )
j --
} else if at .t == ntMulti || at .t == ntOne {
optionsAt = at .options & (RightToLeft | IgnoreCase )
if !wasLastString || optionsLast != optionsAt {
wasLastString = true
optionsLast = optionsAt
continue
}
j --
prev = n .children [j ]
if prev .t == ntOne {
prev .t = ntMulti
prev .str = []rune {prev .ch }
}
if (optionsAt & RightToLeft ) == 0 {
if at .t == ntOne {
prev .str = append (prev .str , at .ch )
} else {
prev .str = append (prev .str , at .str ...)
}
} else {
if at .t == ntOne {
prev .str = append (prev .str , 0 )
copy (prev .str [1 :], prev .str )
prev .str [0 ] = at .ch
} else {
merge := make ([]rune , len (prev .str )+len (at .str ))
copy (merge , at .str )
copy (merge [len (at .str ):], prev .str )
prev .str = merge
}
}
} else if at .t == ntEmpty {
j --
} else {
wasLastString = false
}
}
if j < i {
n .removeChildren (j , i )
}
return n .stripEnation (ntEmpty )
}
func (n *regexNode ) reduceRep () *regexNode {
u := n
t := n .t
min := n .m
max := n .n
for {
if len (u .children ) == 0 {
break
}
child := u .children [0 ]
if child .t != t {
childType := child .t
if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop ) {
break
}
}
if u .m == 0 && child .m > 1 || child .n < child .m *2 {
break
}
u = child
if u .m > 0 {
if (math .MaxInt32 -1 )/u .m < min {
u .m = math .MaxInt32
} else {
u .m = u .m * min
}
}
if u .n > 0 {
if (math .MaxInt32 -1 )/u .n < max {
u .n = math .MaxInt32
} else {
u .n = u .n * max
}
}
}
if math .MaxInt32 == min {
return newRegexNode (ntNothing , n .options )
}
return u
}
func (n *regexNode ) stripEnation (emptyType nodeType ) *regexNode {
switch len (n .children ) {
case 0 :
return newRegexNode (emptyType , n .options )
case 1 :
return n .children [0 ]
default :
return n
}
}
func (n *regexNode ) reduceGroup () *regexNode {
u := n
for u .t == ntGroup {
u = u .children [0 ]
}
return u
}
func (n *regexNode ) reduceSet () *regexNode {
if n .set == nil {
n .t = ntNothing
} else if n .set .IsSingleton () {
n .ch = n .set .SingletonChar ()
n .set = nil
n .t += (ntOne - ntSet )
} else if n .set .IsSingletonInverse () {
n .ch = n .set .SingletonChar ()
n .set = nil
n .t += (ntNotone - ntSet )
}
return n
}
func (n *regexNode ) reverseLeft () *regexNode {
if n .options &RightToLeft != 0 && n .t == ntConcatenate && len (n .children ) > 0 {
for left , right := 0 , len (n .children )-1 ; left < right ; left , right = left +1 , right -1 {
n .children [left ], n .children [right ] = n .children [right ], n .children [left ]
}
}
return n
}
func (n *regexNode ) makeQuantifier (lazy bool , min , max int ) *regexNode {
if min == 0 && max == 0 {
return newRegexNode (ntEmpty , n .options )
}
if min == 1 && max == 1 {
return n
}
switch n .t {
case ntOne , ntNotone , ntSet :
if lazy {
n .makeRep (Onelazy , min , max )
} else {
n .makeRep (Oneloop , min , max )
}
return n
default :
var t nodeType
if lazy {
t = ntLazyloop
} else {
t = ntLoop
}
result := newRegexNodeMN (t , n .options , min , max )
result .addChild (n )
return result
}
}
var typeStr = []string {
"Onerep" , "Notonerep" , "Setrep" ,
"Oneloop" , "Notoneloop" , "Setloop" ,
"Onelazy" , "Notonelazy" , "Setlazy" ,
"One" , "Notone" , "Set" ,
"Multi" , "Ref" ,
"Bol" , "Eol" , "Boundary" , "Nonboundary" ,
"Beginning" , "Start" , "EndZ" , "End" ,
"Nothing" , "Empty" ,
"Alternate" , "Concatenate" ,
"Loop" , "Lazyloop" ,
"Capture" , "Group" , "Require" , "Prevent" , "Greedy" ,
"Testref" , "Testgroup" ,
"Unknown" , "Unknown" , "Unknown" ,
"Unknown" , "Unknown" , "Unknown" ,
"ECMABoundary" , "NonECMABoundary" ,
}
func (n *regexNode ) description () string {
buf := &bytes .Buffer {}
buf .WriteString (typeStr [n .t ])
if (n .options & ExplicitCapture ) != 0 {
buf .WriteString ("-C" )
}
if (n .options & IgnoreCase ) != 0 {
buf .WriteString ("-I" )
}
if (n .options & RightToLeft ) != 0 {
buf .WriteString ("-L" )
}
if (n .options & Multiline ) != 0 {
buf .WriteString ("-M" )
}
if (n .options & Singleline ) != 0 {
buf .WriteString ("-S" )
}
if (n .options & IgnorePatternWhitespace ) != 0 {
buf .WriteString ("-X" )
}
if (n .options & ECMAScript ) != 0 {
buf .WriteString ("-E" )
}
switch n .t {
case ntOneloop , ntNotoneloop , ntOnelazy , ntNotonelazy , ntOne , ntNotone :
buf .WriteString ("(Ch = " + CharDescription (n .ch ) + ")" )
break
case ntCapture :
buf .WriteString ("(index = " + strconv .Itoa (n .m ) + ", unindex = " + strconv .Itoa (n .n ) + ")" )
break
case ntRef , ntTestref :
buf .WriteString ("(index = " + strconv .Itoa (n .m ) + ")" )
break
case ntMulti :
fmt .Fprintf (buf , "(String = %s)" , string (n .str ))
break
case ntSet , ntSetloop , ntSetlazy :
buf .WriteString ("(Set = " + n .set .String () + ")" )
break
}
switch n .t {
case ntOneloop , ntNotoneloop , ntOnelazy , ntNotonelazy , ntSetloop , ntSetlazy , ntLoop , ntLazyloop :
buf .WriteString ("(Min = " )
buf .WriteString (strconv .Itoa (n .m ))
buf .WriteString (", Max = " )
if n .n == math .MaxInt32 {
buf .WriteString ("inf" )
} else {
buf .WriteString (strconv .Itoa (n .n ))
}
buf .WriteString (")" )
break
}
return buf .String ()
}
var padSpace = []byte (" " )
func (t *RegexTree ) Dump () string {
return t .root .dump ()
}
func (n *regexNode ) dump () string {
var stack []int
CurNode := n
CurChild := 0
buf := bytes .NewBufferString (CurNode .description ())
buf .WriteRune ('\n' )
for {
if CurNode .children != nil && CurChild < len (CurNode .children ) {
stack = append (stack , CurChild +1 )
CurNode = CurNode .children [CurChild ]
CurChild = 0
Depth := len (stack )
if Depth > 32 {
Depth = 32
}
buf .Write (padSpace [:Depth ])
buf .WriteString (CurNode .description ())
buf .WriteRune ('\n' )
} else {
if len (stack ) == 0 {
break
}
CurChild = stack [len (stack )-1 ]
stack = stack [:len (stack )-1 ]
CurNode = CurNode .next
}
}
return buf .String ()
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .