package syntax

import (
	
	
	
	
)

type RegexTree struct {
	root       *regexNode
	caps       map[int]int
	capnumlist []int
	captop     int
	Capnames   map[string]int
	Caplist    []string
	options    RegexOptions
}

// It is built into a parsed tree for a regular expression.

// Implementation notes:
//
// Since the node tree is a temporary data structure only used
// during compilation of the regexp to integer codes, it's
// designed for clarity and convenience rather than
// space efficiency.
//
// RegexNodes are built into a tree, linked by the n.children list.
// Each node also has a n.parent and n.ichild member indicating
// its parent and which child # it is in its parent's list.
//
// RegexNodes come in as many types as there are constructs in
// a regular expression, for example, "concatenate", "alternate",
// "one", "rept", "group". There are also node types for basic
// peephole optimizations, e.g., "onerep", "notsetrep", etc.
//
// Because perl 5 allows "lookback" groups that scan backwards,
// each node also gets a "direction". Normally the value of
// boolean n.backward = false.
//
// During parsing, top-level nodes are also stacked onto a parse
// stack (a stack of trees). For this purpose we have a n.next
// pointer. [Note that to save a few bytes, we could overload the
// n.parent pointer instead.]
//
// On the parse stack, each tree has a "role" - basically, the
// nonterminal in the grammar that the parser has currently
// assigned to the tree. That code is stored in n.role.
//
// Finally, some of the different kinds of nodes have data.
// Two integers (for the looping constructs) are stored in
// n.operands, an an object (either a string or a set)
// is stored in n.data
type regexNode struct {
	t        nodeType
	children []*regexNode
	str      []rune
	set      *CharSet
	ch       rune
	m        int
	n        int
	options  RegexOptions
	next     *regexNode
}

type nodeType int32

const (
	// The following are leaves, and correspond to primitive operations

	ntOnerep      nodeType = 0  // lef,back char,min,max    a {n}
	ntNotonerep            = 1  // lef,back char,min,max    .{n}
	ntSetrep               = 2  // lef,back set,min,max     [\d]{n}
	ntOneloop              = 3  // lef,back char,min,max    a {,n}
	ntNotoneloop           = 4  // lef,back char,min,max    .{,n}
	ntSetloop              = 5  // lef,back set,min,max     [\d]{,n}
	ntOnelazy              = 6  // lef,back char,min,max    a {,n}?
	ntNotonelazy           = 7  // lef,back char,min,max    .{,n}?
	ntSetlazy              = 8  // lef,back set,min,max     [\d]{,n}?
	ntOne                  = 9  // lef      char            a
	ntNotone               = 10 // lef      char            [^a]
	ntSet                  = 11 // lef      set             [a-z\s]  \w \s \d
	ntMulti                = 12 // lef      string          abcd
	ntRef                  = 13 // lef      group           \#
	ntBol                  = 14 //                          ^
	ntEol                  = 15 //                          $
	ntBoundary             = 16 //                          \b
	ntNonboundary          = 17 //                          \B
	ntBeginning            = 18 //                          \A
	ntStart                = 19 //                          \G
	ntEndZ                 = 20 //                          \Z
	ntEnd                  = 21 //                          \Z

	// Interior nodes do not correspond to primitive operations, but
	// control structures compositing other operations

	// Concat and alternate take n children, and can run forward or backwards

	ntNothing     = 22 //          []
	ntEmpty       = 23 //          ()
	ntAlternate   = 24 //          a|b
	ntConcatenate = 25 //          ab
	ntLoop        = 26 // m,x      * + ? {,}
	ntLazyloop    = 27 // m,x      *? +? ?? {,}?
	ntCapture     = 28 // n        ()
	ntGroup       = 29 //          (?:)
	ntRequire     = 30 //          (?=) (?<=)
	ntPrevent     = 31 //          (?!) (?<!)
	ntGreedy      = 32 //          (?>) (?<)
	ntTestref     = 33 //          (?(n) | )
	ntTestgroup   = 34 //          (?(...) | )

	ntECMABoundary    = 41 //                          \b
	ntNonECMABoundary = 42 //                          \B
)

func newRegexNode( nodeType,  RegexOptions) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
	}
}

func newRegexNodeCh( nodeType,  RegexOptions,  rune) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
		ch:      ,
	}
}

func newRegexNodeStr( nodeType,  RegexOptions,  []rune) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
		str:     ,
	}
}

func newRegexNodeSet( nodeType,  RegexOptions,  *CharSet) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
		set:     ,
	}
}

func newRegexNodeM( nodeType,  RegexOptions,  int) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
		m:       ,
	}
}
func newRegexNodeMN( nodeType,  RegexOptions, ,  int) *regexNode {
	return &regexNode{
		t:       ,
		options: ,
		m:       ,
		n:       ,
	}
}

func ( *regexNode) ( *bytes.Buffer) {
	for  := 0;  < len(.str); ++ {
		.WriteRune(.str[])
	}
}

func ( *regexNode) ( *regexNode) {
	 := .reduce()
	.children = append(.children, )
	.next = 
}

func ( *regexNode) ( int,  []*regexNode) {
	 := make([]*regexNode, 0, len(.children)+len())
	.children = append(append(append(, .children[:]...), ...), .children[:]...)
}

// removes children including the start but not the end index
func ( *regexNode) (,  int) {
	.children = append(.children[:], .children[:]...)
}

// Pass type as OneLazy or OneLoop
func ( *regexNode) ( nodeType, ,  int) {
	.t += ( - ntOne)
	.m = 
	.n = 
}

func ( *regexNode) () *regexNode {
	switch .t {
	case ntAlternate:
		return .reduceAlternation()

	case ntConcatenate:
		return .reduceConcatenation()

	case ntLoop, ntLazyloop:
		return .reduceRep()

	case ntGroup:
		return .reduceGroup()

	case ntSet, ntSetloop:
		return .reduceSet()

	default:
		return 
	}
}

// Basic optimization. Single-letter alternations can be replaced
// by faster set specifications, and nested alternations with no
// intervening operators can be flattened:
//
// a|b|c|def|g|h -> [a-c]|def|[gh]
// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
func ( *regexNode) () *regexNode {
	if len(.children) == 0 {
		return newRegexNode(ntNothing, .options)
	}

	 := false
	 := false
	var  RegexOptions
	var ,  int

	for ,  = 0, 0;  < len(.children); ,  = +1, +1 {
		 := .children[]

		if  <  {
			.children[] = 
		}

		for {
			if .t == ntAlternate {
				for  := 0;  < len(.children); ++ {
					.children[].next = 
				}
				.insertChildren(+1, .children)

				--
			} else if .t == ntSet || .t == ntOne {
				// Cannot merge sets if L or I options differ, or if either are negated.
				 := .options & (RightToLeft | IgnoreCase)

				if .t == ntSet {
					if ! ||  !=  ||  || !.set.IsMergeable() {
						 = true
						 = !.set.IsMergeable()
						 = 
						break
					}
				} else if ! ||  !=  ||  {
					 = true
					 = false
					 = 
					break
				}

				// The last node was a Set or a One, we're a Set or One and our options are the same.
				// Merge the two nodes.
				--
				 := .children[]

				var  *CharSet
				if .t == ntOne {
					 = &CharSet{}
					.addChar(.ch)
				} else {
					 = .set
				}

				if .t == ntOne {
					.addChar(.ch)
				} else {
					.addSet(*.set)
				}

				.t = ntSet
				.set = 
			} else if .t == ntNothing {
				--
			} else {
				 = false
				 = false
			}
			break
		}
	}

	if  <  {
		.removeChildren(, )
	}

	return .stripEnation(ntNothing)
}

// Basic optimization. Adjacent strings can be concatenated.
//
// (?:abc)(?:def) -> abcdef
func ( *regexNode) () *regexNode {
	// Eliminate empties and concat adjacent strings/chars

	var  RegexOptions
	var  RegexOptions
	var ,  int

	if len(.children) == 0 {
		return newRegexNode(ntEmpty, .options)
	}

	 := false

	for ,  = 0, 0;  < len(.children); ,  = +1, +1 {
		var ,  *regexNode

		 = .children[]

		if  <  {
			.children[] = 
		}

		if .t == ntConcatenate &&
			((.options & RightToLeft) == (.options & RightToLeft)) {
			for  := 0;  < len(.children); ++ {
				.children[].next = 
			}

			//insert at.children at i+1 index in n.children
			.insertChildren(+1, .children)

			--
		} else if .t == ntMulti || .t == ntOne {
			// Cannot merge strings if L or I options differ
			 = .options & (RightToLeft | IgnoreCase)

			if ! ||  !=  {
				 = true
				 = 
				continue
			}

			--
			 = .children[]

			if .t == ntOne {
				.t = ntMulti
				.str = []rune{.ch}
			}

			if ( & RightToLeft) == 0 {
				if .t == ntOne {
					.str = append(.str, .ch)
				} else {
					.str = append(.str, .str...)
				}
			} else {
				if .t == ntOne {
					// insert at the front by expanding our slice, copying the data over, and then setting the value
					.str = append(.str, 0)
					copy(.str[1:], .str)
					.str[0] = .ch
				} else {
					//insert at the front...this one we'll make a new slice and copy both into it
					 := make([]rune, len(.str)+len(.str))
					copy(, .str)
					copy([len(.str):], .str)
					.str = 
				}
			}
		} else if .t == ntEmpty {
			--
		} else {
			 = false
		}
	}

	if  <  {
		// remove indices j through i from the children
		.removeChildren(, )
	}

	return .stripEnation(ntEmpty)
}

// Nested repeaters just get multiplied with each other if they're not
// too lumpy
func ( *regexNode) () *regexNode {

	 := 
	 := .t
	 := .m
	 := .n

	for {
		if len(.children) == 0 {
			break
		}

		 := .children[0]

		// multiply reps of the same type only
		if .t !=  {
			 := .t

			if !( >= ntOneloop &&  <= ntSetloop &&  == ntLoop ||
				 >= ntOnelazy &&  <= ntSetlazy &&  == ntLazyloop) {
				break
			}
		}

		// child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
		// [but things like (a {2,})+ are not too lumpy...]
		if .m == 0 && .m > 1 || .n < .m*2 {
			break
		}

		 = 
		if .m > 0 {
			if (math.MaxInt32-1)/.m <  {
				.m = math.MaxInt32
			} else {
				.m = .m * 
			}
		}
		if .n > 0 {
			if (math.MaxInt32-1)/.n <  {
				.n = math.MaxInt32
			} else {
				.n = .n * 
			}
		}
	}

	if math.MaxInt32 ==  {
		return newRegexNode(ntNothing, .options)
	}
	return 

}

// Simple optimization. If a concatenation or alternation has only
// one child strip out the intermediate node. If it has zero children,
// turn it into an empty.
func ( *regexNode) ( nodeType) *regexNode {
	switch len(.children) {
	case 0:
		return newRegexNode(, .options)
	case 1:
		return .children[0]
	default:
		return 
	}
}

func ( *regexNode) () *regexNode {
	 := 

	for .t == ntGroup {
		 = .children[0]
	}

	return 
}

// Simple optimization. If a set is a singleton, an inverse singleton,
// or empty, it's transformed accordingly.
func ( *regexNode) () *regexNode {
	// Extract empty-set, one and not-one case as special

	if .set == nil {
		.t = ntNothing
	} else if .set.IsSingleton() {
		.ch = .set.SingletonChar()
		.set = nil
		.t += (ntOne - ntSet)
	} else if .set.IsSingletonInverse() {
		.ch = .set.SingletonChar()
		.set = nil
		.t += (ntNotone - ntSet)
	}

	return 
}

func ( *regexNode) () *regexNode {
	if .options&RightToLeft != 0 && .t == ntConcatenate && len(.children) > 0 {
		//reverse children order
		for ,  := 0, len(.children)-1;  < ; ,  = +1, -1 {
			.children[], .children[] = .children[], .children[]
		}
	}

	return 
}

func ( *regexNode) ( bool, ,  int) *regexNode {
	if  == 0 &&  == 0 {
		return newRegexNode(ntEmpty, .options)
	}

	if  == 1 &&  == 1 {
		return 
	}

	switch .t {
	case ntOne, ntNotone, ntSet:
		if  {
			.makeRep(Onelazy, , )
		} else {
			.makeRep(Oneloop, , )
		}
		return 

	default:
		var  nodeType
		if  {
			 = ntLazyloop
		} else {
			 = ntLoop
		}
		 := newRegexNodeMN(, .options, , )
		.addChild()
		return 
	}
}

// debug functions

var typeStr = []string{
	"Onerep", "Notonerep", "Setrep",
	"Oneloop", "Notoneloop", "Setloop",
	"Onelazy", "Notonelazy", "Setlazy",
	"One", "Notone", "Set",
	"Multi", "Ref",
	"Bol", "Eol", "Boundary", "Nonboundary",
	"Beginning", "Start", "EndZ", "End",
	"Nothing", "Empty",
	"Alternate", "Concatenate",
	"Loop", "Lazyloop",
	"Capture", "Group", "Require", "Prevent", "Greedy",
	"Testref", "Testgroup",
	"Unknown", "Unknown", "Unknown",
	"Unknown", "Unknown", "Unknown",
	"ECMABoundary", "NonECMABoundary",
}

func ( *regexNode) () string {
	 := &bytes.Buffer{}

	.WriteString(typeStr[.t])

	if (.options & ExplicitCapture) != 0 {
		.WriteString("-C")
	}
	if (.options & IgnoreCase) != 0 {
		.WriteString("-I")
	}
	if (.options & RightToLeft) != 0 {
		.WriteString("-L")
	}
	if (.options & Multiline) != 0 {
		.WriteString("-M")
	}
	if (.options & Singleline) != 0 {
		.WriteString("-S")
	}
	if (.options & IgnorePatternWhitespace) != 0 {
		.WriteString("-X")
	}
	if (.options & ECMAScript) != 0 {
		.WriteString("-E")
	}

	switch .t {
	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
		.WriteString("(Ch = " + CharDescription(.ch) + ")")
		break
	case ntCapture:
		.WriteString("(index = " + strconv.Itoa(.m) + ", unindex = " + strconv.Itoa(.n) + ")")
		break
	case ntRef, ntTestref:
		.WriteString("(index = " + strconv.Itoa(.m) + ")")
		break
	case ntMulti:
		fmt.Fprintf(, "(String = %s)", string(.str))
		break
	case ntSet, ntSetloop, ntSetlazy:
		.WriteString("(Set = " + .set.String() + ")")
		break
	}

	switch .t {
	case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
		.WriteString("(Min = ")
		.WriteString(strconv.Itoa(.m))
		.WriteString(", Max = ")
		if .n == math.MaxInt32 {
			.WriteString("inf")
		} else {
			.WriteString(strconv.Itoa(.n))
		}
		.WriteString(")")

		break
	}

	return .String()
}

var padSpace = []byte("                                ")

func ( *RegexTree) () string {
	return .root.dump()
}

func ( *regexNode) () string {
	var  []int
	 := 
	 := 0

	 := bytes.NewBufferString(.description())
	.WriteRune('\n')

	for {
		if .children != nil &&  < len(.children) {
			 = append(, +1)
			 = .children[]
			 = 0

			 := len()
			if  > 32 {
				 = 32
			}
			.Write(padSpace[:])
			.WriteString(.description())
			.WriteRune('\n')
		} else {
			if len() == 0 {
				break
			}

			 = [len()-1]
			 = [:len()-1]
			 = .next
		}
	}
	return .String()
}