package syntax

import (
	
	
	
	
	
)

type Prefix struct {
	PrefixStr       []rune
	PrefixSet       CharSet
	CaseInsensitive bool
}

// It takes a RegexTree and computes the set of chars that can start it.
func getFirstCharsPrefix( *RegexTree) *Prefix {
	 := regexFcd{
		fcStack:  make([]regexFc, 32),
		intStack: make([]int, 32),
	}
	 := .regexFCFromRegexTree()

	if  == nil || .nullable || .cc.IsEmpty() {
		return nil
	}
	 := .getFirstChars()
	return &Prefix{PrefixSet: , CaseInsensitive: .caseInsensitive}
}

type regexFcd struct {
	intStack        []int
	intDepth        int
	fcStack         []regexFc
	fcDepth         int
	skipAllChildren bool // don't process any more children at the current level
	skipchild       bool // don't process the current child.
	failed          bool
}

/*
 * The main FC computation. It does a shortcutted depth-first walk
 * through the tree and calls CalculateFC to emits code before
 * and after each child of an interior node, and at each leaf.
 */
func ( *regexFcd) ( *RegexTree) *regexFc {
	 := .root
	 := 0

	for {
		if len(.children) == 0 {
			// This is a leaf node
			.calculateFC(.t, , 0)
		} else if  < len(.children) && !.skipAllChildren {
			// This is an interior node, and we have more children to analyze
			.calculateFC(.t|beforeChild, , )

			if !.skipchild {
				 = .children[]
				// this stack is how we get a depth first walk of the tree.
				.pushInt()
				 = 0
			} else {
				++
				.skipchild = false
			}
			continue
		}

		// This is an interior node where we've finished analyzing all the children, or
		// the end of a leaf node.
		.skipAllChildren = false

		if .intIsEmpty() {
			break
		}

		 = .popInt()
		 = .next

		.calculateFC(.t|afterChild, , )
		if .failed {
			return nil
		}

		++
	}

	if .fcIsEmpty() {
		return nil
	}

	return .popFC()
}

// To avoid recursion, we use a simple integer stack.
// This is the push.
func ( *regexFcd) ( int) {
	if .intDepth >= len(.intStack) {
		 := make([]int, .intDepth*2)
		copy(, .intStack)
		.intStack = 
	}

	.intStack[.intDepth] = 
	.intDepth++
}

// True if the stack is empty.
func ( *regexFcd) () bool {
	return .intDepth == 0
}

// This is the pop.
func ( *regexFcd) () int {
	.intDepth--
	return .intStack[.intDepth]
}

// We also use a stack of RegexFC objects.
// This is the push.
func ( *regexFcd) ( regexFc) {
	if .fcDepth >= len(.fcStack) {
		 := make([]regexFc, .fcDepth*2)
		copy(, .fcStack)
		.fcStack = 
	}

	.fcStack[.fcDepth] = 
	.fcDepth++
}

// True if the stack is empty.
func ( *regexFcd) () bool {
	return .fcDepth == 0
}

// This is the pop.
func ( *regexFcd) () *regexFc {
	.fcDepth--
	return &.fcStack[.fcDepth]
}

// This is the top.
func ( *regexFcd) () *regexFc {
	return &.fcStack[.fcDepth-1]
}

// Called in Beforechild to prevent further processing of the current child
func ( *regexFcd) () {
	.skipchild = true
}

// FC computation and shortcut cases for each node type
func ( *regexFcd) ( nodeType,  *regexNode,  int) {
	//fmt.Printf("NodeType: %v, CurIndex: %v, Desc: %v\n", nt, CurIndex, node.description())
	 := false
	 := false

	if  <= ntRef {
		if (.options & IgnoreCase) != 0 {
			 = true
		}
		if (.options & RightToLeft) != 0 {
			 = true
		}
	}

	switch  {
	case ntConcatenate | beforeChild, ntAlternate | beforeChild, ntTestref | beforeChild, ntLoop | beforeChild, ntLazyloop | beforeChild:
		break

	case ntTestgroup | beforeChild:
		if  == 0 {
			.skipChild()
		}
		break

	case ntEmpty:
		.pushFC(regexFc{nullable: true})
		break

	case ntConcatenate | afterChild:
		if  != 0 {
			 := .popFC()
			 := .topFC()

			.failed = !.addFC(*, true)
		}

		 := .topFC()
		if !.nullable {
			.skipAllChildren = true
		}
		break

	case ntTestgroup | afterChild:
		if  > 1 {
			 := .popFC()
			 := .topFC()

			.failed = !.addFC(*, false)
		}
		break

	case ntAlternate | afterChild, ntTestref | afterChild:
		if  != 0 {
			 := .popFC()
			 := .topFC()

			.failed = !.addFC(*, false)
		}
		break

	case ntLoop | afterChild, ntLazyloop | afterChild:
		if .m == 0 {
			 := .topFC()
			.nullable = true
		}
		break

	case ntGroup | beforeChild, ntGroup | afterChild, ntCapture | beforeChild, ntCapture | afterChild, ntGreedy | beforeChild, ntGreedy | afterChild:
		break

	case ntRequire | beforeChild, ntPrevent | beforeChild:
		.skipChild()
		.pushFC(regexFc{nullable: true})
		break

	case ntRequire | afterChild, ntPrevent | afterChild:
		break

	case ntOne, ntNotone:
		.pushFC(newRegexFc(.ch,  == ntNotone, false, ))
		break

	case ntOneloop, ntOnelazy:
		.pushFC(newRegexFc(.ch, false, .m == 0, ))
		break

	case ntNotoneloop, ntNotonelazy:
		.pushFC(newRegexFc(.ch, true, .m == 0, ))
		break

	case ntMulti:
		if len(.str) == 0 {
			.pushFC(regexFc{nullable: true})
		} else if ! {
			.pushFC(newRegexFc(.str[0], false, false, ))
		} else {
			.pushFC(newRegexFc(.str[len(.str)-1], false, false, ))
		}
		break

	case ntSet:
		.pushFC(regexFc{cc: .set.Copy(), nullable: false, caseInsensitive: })
		break

	case ntSetloop, ntSetlazy:
		.pushFC(regexFc{cc: .set.Copy(), nullable: .m == 0, caseInsensitive: })
		break

	case ntRef:
		.pushFC(regexFc{cc: *AnyClass(), nullable: true, caseInsensitive: false})
		break

	case ntNothing, ntBol, ntEol, ntBoundary, ntNonboundary, ntECMABoundary, ntNonECMABoundary, ntBeginning, ntStart, ntEndZ, ntEnd:
		.pushFC(regexFc{nullable: true})
		break

	default:
		panic(fmt.Sprintf("unexpected op code: %v", ))
	}
}

type regexFc struct {
	cc              CharSet
	nullable        bool
	caseInsensitive bool
}

func newRegexFc( rune, , ,  bool) regexFc {
	 := regexFc{
		caseInsensitive: ,
		nullable:        ,
	}
	if  {
		if  > 0 {
			.cc.addRange('\x00', -1)
		}
		if  < 0xFFFF {
			.cc.addRange(+1, utf8.MaxRune)
		}
	} else {
		.cc.addRange(, )
	}
	return 
}

func ( *regexFc) () CharSet {
	if .caseInsensitive {
		.cc.addLowercase()
	}

	return .cc
}

func ( *regexFc) ( regexFc,  bool) bool {
	if !.cc.IsMergeable() || !.cc.IsMergeable() {
		return false
	}

	if  {
		if !.nullable {
			return true
		}

		if !.nullable {
			.nullable = false
		}
	} else {
		if .nullable {
			.nullable = true
		}
	}

	.caseInsensitive = .caseInsensitive || .caseInsensitive
	.cc.addSet(.cc)

	return true
}

// This is a related computation: it takes a RegexTree and computes the
// leading substring if it sees one. It's quite trivial and gives up easily.
func getPrefix( *RegexTree) *Prefix {
	var  *regexNode
	 := 0

	 := .root

	for {
		switch .t {
		case ntConcatenate:
			if len(.children) > 0 {
				 = 
				 = 0
			}

		case ntGreedy, ntCapture:
			 = .children[0]
			 = nil
			continue

		case ntOneloop, ntOnelazy:
			if .m > 0 {
				return &Prefix{
					PrefixStr:       repeat(.ch, .m),
					CaseInsensitive: (.options & IgnoreCase) != 0,
				}
			}
			return nil

		case ntOne:
			return &Prefix{
				PrefixStr:       []rune{.ch},
				CaseInsensitive: (.options & IgnoreCase) != 0,
			}

		case ntMulti:
			return &Prefix{
				PrefixStr:       .str,
				CaseInsensitive: (.options & IgnoreCase) != 0,
			}

		case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning, ntStart,
			ntEndZ, ntEnd, ntEmpty, ntRequire, ntPrevent:

		default:
			return nil
		}

		if  == nil ||  >= len(.children) {
			return nil
		}

		 = .children[]
		++
	}
}

// repeat the rune r, c times... up to the max of MaxPrefixSize
func repeat( rune,  int) []rune {
	if  > MaxPrefixSize {
		 = MaxPrefixSize
	}

	 := make([]rune, )

	// binary growth using copy for speed
	[0] = 
	 := 1
	for  < len() {
		copy([:], [:])
		 *= 2
	}

	return 
}

// BmPrefix precomputes the Boyer-Moore
// tables for fast string scanning. These tables allow
// you to scan for the first occurrence of a string within
// a large body of text without examining every character.
// The performance of the heuristic depends on the actual
// string and the text being searched, but usually, the longer
// the string that is being searched for, the fewer characters
// need to be examined.
type BmPrefix struct {
	positive        []int
	negativeASCII   []int
	negativeUnicode [][]int
	pattern         []rune
	lowASCII        rune
	highASCII       rune
	rightToLeft     bool
	caseInsensitive bool
}

func newBmPrefix( []rune, ,  bool) *BmPrefix {

	 := &BmPrefix{
		rightToLeft:     ,
		caseInsensitive: ,
		pattern:         ,
	}

	if  {
		for  := 0;  < len(.pattern); ++ {
			// We do the ToLower character by character for consistency.  With surrogate chars, doing
			// a ToLower on the entire string could actually change the surrogate pair.  This is more correct
			// linguistically, but since Regex doesn't support surrogates, it's more important to be
			// consistent.

			.pattern[] = unicode.ToLower(.pattern[])
		}
	}

	var , ,  int
	var ,  int

	if ! {
		 = -1
		 = len(.pattern) - 1
		 = 1
	} else {
		 = len(.pattern)
		 = 0
		 = -1
	}

	// PART I - the good-suffix shift table
	//
	// compute the positive requirement:
	// if char "i" is the first one from the right that doesn't match,
	// then we know the matcher can advance by _positive[i].
	//
	// This algorithm is a simplified variant of the standard
	// Boyer-Moore good suffix calculation.

	.positive = make([]int, len(.pattern))

	 := 
	 := .pattern[]
	.positive[] = 
	 -= 

:
	for {
		// find an internal char (examine) that matches the tail

		for {
			if  ==  {
				break 
			}
			if .pattern[] ==  {
				break
			}
			 -= 
		}

		 = 
		 = 

		// find the length of the match
		for {
			if  ==  || .pattern[] != .pattern[] {
				// at the end of the match, note the difference in _positive
				// this is not the length of the match, but the distance from the internal match
				// to the tail suffix.
				if .positive[] == 0 {
					.positive[] =  - 
				}

				// System.Diagnostics.Debug.WriteLine("Set positive[" + match + "] to " + (match - scan));

				break
			}

			 -= 
			 -= 
		}

		 -= 
	}

	 =  - 

	// scan for the chars for which there are no shifts that yield a different candidate

	// The inside of the if statement used to say
	// "_positive[match] = last - beforefirst;"
	// This is slightly less aggressive in how much we skip, but at worst it
	// should mean a little more work rather than skipping a potential match.
	for  !=  {
		if .positive[] == 0 {
			.positive[] = 
		}

		 -= 
	}

	// PART II - the bad-character shift table
	//
	// compute the negative requirement:
	// if char "ch" is the reject character when testing position "i",
	// we can slide up by _negative[ch];
	// (_negative[ch] = str.Length - 1 - str.LastIndexOf(ch))
	//
	// the lookup table is divided into ASCII and Unicode portions;
	// only those parts of the Unicode 16-bit code set that actually
	// appear in the string are in the table. (Maximum size with
	// Unicode is 65K; ASCII only case is 512 bytes.)

	.negativeASCII = make([]int, 128)

	for  := 0;  < len(.negativeASCII); ++ {
		.negativeASCII[] =  - 
	}

	.lowASCII = 127
	.highASCII = 0

	for  = ;  != ;  -=  {
		 = .pattern[]

		switch {
		case  < 128:
			if .lowASCII >  {
				.lowASCII = 
			}

			if .highASCII <  {
				.highASCII = 
			}

			if .negativeASCII[] == - {
				.negativeASCII[] =  - 
			}
		case  <= 0xffff:
			,  := >>8, &0xFF

			if .negativeUnicode == nil {
				.negativeUnicode = make([][]int, 256)
			}

			if .negativeUnicode[] == nil {
				 := make([]int, 256)

				for  := 0;  < len(); ++ {
					[] =  - 
				}

				if  == 0 {
					copy(, .negativeASCII)
					//TODO: this line needed?
					.negativeASCII = 
				}

				.negativeUnicode[] = 
			}

			if .negativeUnicode[][] == - {
				.negativeUnicode[][] =  - 
			}
		default:
			// we can't do the filter because this algo doesn't support
			// unicode chars >0xffff
			return nil
		}
	}

	return 
}

func ( *BmPrefix) () string {
	return string(.pattern)
}

// Dump returns the contents of the filter as a human readable string
func ( *BmPrefix) ( string) string {
	 := &bytes.Buffer{}

	fmt.Fprintf(, "%sBM Pattern: %s\n%sPositive: ", , string(.pattern), )
	for  := 0;  < len(.positive); ++ {
		.WriteString(strconv.Itoa(.positive[]))
		.WriteRune(' ')
	}
	.WriteRune('\n')

	if .negativeASCII != nil {
		.WriteString()
		.WriteString("Negative table\n")
		for  := 0;  < len(.negativeASCII); ++ {
			if .negativeASCII[] != len(.pattern) {
				fmt.Fprintf(, "%s  %s %s\n", , Escape(string(rune())), strconv.Itoa(.negativeASCII[]))
			}
		}
	}

	return .String()
}

// Scan uses the Boyer-Moore algorithm to find the first occurrence
// of the specified string within text, beginning at index, and
// constrained within beglimit and endlimit.
//
// The direction and case-sensitivity of the match is determined
// by the arguments to the RegexBoyerMoore constructor.
func ( *BmPrefix) ( []rune, , ,  int) int {
	var (
		, ,          int
		, ,  int
		,                int
		                      rune
		               []int
	)

	if !.rightToLeft {
		 = len(.pattern)
		 = len(.pattern) - 1
		 = 0
		 =  +  - 1
		 = 1
	} else {
		 = -len(.pattern)
		 = 0
		 = - - 1
		 =  + 
		 = -1
	}

	 := .pattern[]

	for {
		if  >=  ||  <  {
			return -1
		}

		 = []

		if .caseInsensitive {
			 = unicode.ToLower()
		}

		if  !=  {
			if  < 128 {
				 = .negativeASCII[]
			} else if  < 0xffff && len(.negativeUnicode) > 0 {
				 = .negativeUnicode[>>8]
				if len() > 0 {
					 = [&0xFF]
				} else {
					 = 
				}
			} else {
				 = 
			}

			 += 
		} else { // if (chTest == chMatch)
			 = 
			 = 

			for {
				if  ==  {
					if .rightToLeft {
						return  + 1
					} else {
						return 
					}
				}

				 -= 
				 -= 

				 = []

				if .caseInsensitive {
					 = unicode.ToLower()
				}

				if  != .pattern[] {
					 = .positive[]
					if  < 128 {
						 = ( - ) + .negativeASCII[]
					} else if  < 0xffff && len(.negativeUnicode) > 0 {
						 = .negativeUnicode[>>8]
						if len() > 0 {
							 = ( - ) + [&0xFF]
						} else {
							 += 
							break
						}
					} else {
						 += 
						break
					}

					if .rightToLeft {
						if  <  {
							 = 
						}
					} else if  >  {
						 = 
					}

					 += 
					break
				}
			}
		}
	}
}

// When a regex is anchored, we can do a quick IsMatch test instead of a Scan
func ( *BmPrefix) ( []rune, , ,  int) bool {
	if !.rightToLeft {
		if  <  || - < len(.pattern) {
			return false
		}

		return .matchPattern(, )
	} else {
		if  >  || - < len(.pattern) {
			return false
		}

		return .matchPattern(, -len(.pattern))
	}
}

func ( *BmPrefix) ( []rune,  int) bool {
	if len()- < len(.pattern) {
		return false
	}

	if .caseInsensitive {
		for  := 0;  < len(.pattern); ++ {
			//Debug.Assert(textinfo.ToLower(_pattern[i]) == _pattern[i], "pattern should be converted to lower case in constructor!");
			if unicode.ToLower([+]) != .pattern[] {
				return false
			}
		}
		return true
	} else {
		for  := 0;  < len(.pattern); ++ {
			if [+] != .pattern[] {
				return false
			}
		}
		return true
	}
}

type AnchorLoc int16

// where the regex can be pegged
const (
	AnchorBeginning    AnchorLoc = 0x0001
	AnchorBol                    = 0x0002
	AnchorStart                  = 0x0004
	AnchorEol                    = 0x0008
	AnchorEndZ                   = 0x0010
	AnchorEnd                    = 0x0020
	AnchorBoundary               = 0x0040
	AnchorECMABoundary           = 0x0080
)

func getAnchors( *RegexTree) AnchorLoc {

	var  *regexNode
	,  := 0, AnchorLoc(0)

	 := .root

	for {
		switch .t {
		case ntConcatenate:
			if len(.children) > 0 {
				 = 
				 = 0
			}

		case ntGreedy, ntCapture:
			 = .children[0]
			 = nil
			continue

		case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning,
			ntStart, ntEndZ, ntEnd:
			return  | anchorFromType(.t)

		case ntEmpty, ntRequire, ntPrevent:

		default:
			return 
		}

		if  == nil ||  >= len(.children) {
			return 
		}

		 = .children[]
		++
	}
}

func anchorFromType( nodeType) AnchorLoc {
	switch  {
	case ntBol:
		return AnchorBol
	case ntEol:
		return AnchorEol
	case ntBoundary:
		return AnchorBoundary
	case ntECMABoundary:
		return AnchorECMABoundary
	case ntBeginning:
		return AnchorBeginning
	case ntStart:
		return AnchorStart
	case ntEndZ:
		return AnchorEndZ
	case ntEnd:
		return AnchorEnd
	default:
		return 0
	}
}

// anchorDescription returns a human-readable description of the anchors
func ( AnchorLoc) () string {
	 := &bytes.Buffer{}

	if 0 != ( & AnchorBeginning) {
		.WriteString(", Beginning")
	}
	if 0 != ( & AnchorStart) {
		.WriteString(", Start")
	}
	if 0 != ( & AnchorBol) {
		.WriteString(", Bol")
	}
	if 0 != ( & AnchorBoundary) {
		.WriteString(", Boundary")
	}
	if 0 != ( & AnchorECMABoundary) {
		.WriteString(", ECMABoundary")
	}
	if 0 != ( & AnchorEol) {
		.WriteString(", Eol")
	}
	if 0 != ( & AnchorEnd) {
		.WriteString(", End")
	}
	if 0 != ( & AnchorEndZ) {
		.WriteString(", EndZ")
	}

	// trim off comma
	if .Len() >= 2 {
		return .String()[2:]
	}
	return "None"
}