// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package colltab

import 

// For a description of ContractTrieSet, see text/collate/build/contract.go.

type ContractTrieSet []struct{ L, H, N, I uint8 }

// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
	states ContractTrieSet
	s      []byte
	n      int
	index  int
	pindex int
	done   bool
}

type ctScannerString struct {
	states ContractTrieSet
	s      string
	n      int
	index  int
	pindex int
	done   bool
}

func ( ContractTrieSet) (,  int,  []byte) ctScanner {
	return ctScanner{s: , states: [:], n: }
}

func ( ContractTrieSet) (,  int,  string) ctScannerString {
	return ctScannerString{s: , states: [:], n: }
}

// result returns the offset i and bytes consumed p so far.  If no suffix
// matched, i and p will be 0.
func ( *ctScanner) () (,  int) {
	return .index, .pindex
}

func ( *ctScannerString) () (,  int) {
	return .index, .pindex
}

const (
	final   = 0
	noIndex = 0xFF
)

// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func ( *ctScanner) ( int) int {
	 :=  // the p at the rune start
	 := .s
	,  := .states, .n
	for  := 0;  <  &&  < len(); {
		 := []
		 := []
		// TODO: a significant number of contractions are of a form that
		// cannot match discontiguous UTF-8 in a normalized string. We could let
		// a negative value of e.n mean that we can set s.done = true and avoid
		// the need for additional matches.
		if  >= .L {
			if .L ==  {
				++
				if .I != noIndex {
					.index = int(.I)
					.pindex = 
				}
				if .N != final {
					, ,  = 0, [int(.H)+:], int(.N)
					if  >= len() || utf8.RuneStart([]) {
						.states, .n,  = , , 
					}
				} else {
					.done = true
					return 
				}
				continue
			} else if .N == final &&  <= .H {
				++
				.done = true
				.index = int(-.L) + int(.I)
				.pindex = 
				return 
			}
		}
		++
	}
	return 
}

// scan is a verbatim copy of ctScanner.scan.
func ( *ctScannerString) ( int) int {
	 :=  // the p at the rune start
	 := .s
	,  := .states, .n
	for  := 0;  <  &&  < len(); {
		 := []
		 := []
		// TODO: a significant number of contractions are of a form that
		// cannot match discontiguous UTF-8 in a normalized string. We could let
		// a negative value of e.n mean that we can set s.done = true and avoid
		// the need for additional matches.
		if  >= .L {
			if .L ==  {
				++
				if .I != noIndex {
					.index = int(.I)
					.pindex = 
				}
				if .N != final {
					, ,  = 0, [int(.H)+:], int(.N)
					if  >= len() || utf8.RuneStart([]) {
						.states, .n,  = , , 
					}
				} else {
					.done = true
					return 
				}
				continue
			} else if .N == final &&  <= .H {
				++
				.done = true
				.index = int(-.L) + int(.I)
				.pindex = 
				return 
			}
		}
		++
	}
	return 
}