package graphemes

import (
	

	
)

// is determines if lookup intersects propert(ies)
func ( property) ( property) bool {
	return ( & ) != 0
}

const _Ignore = _Extend

// SplitFunc is a bufio.SplitFunc implementation of Unicode grapheme cluster segmentation, for use with bufio.Scanner.
//
// See https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
var SplitFunc bufio.SplitFunc = splitFunc[[]byte]

func splitFunc[ iterators.Stringish]( ,  bool) ( int,  ,  error) {
	var  
	if len() == 0 {
		return 0, , nil
	}

	// These vars are stateful across loop iterations
	var  int
	var  property = 0     // "last excluding ignored categories"
	var  property = 0 // "last one before that"
	var  int

	// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
	// to the right of the ×, from which we look back or forward

	,  := lookup([:])
	if  == 0 {
		if ! {
			// Rune extends past current data, request more
			return 0, , nil
		}
		 = len()
		return , [:], nil
	}

	// https://unicode.org/reports/tr29/#GB1
	// Start of text always advances
	 += 

	for {
		 :=  == len() // "end of text"

		if  {
			if ! {
				// Token extends past current data, request more
				return 0, , nil
			}

			// https://unicode.org/reports/tr29/#GB2
			break
		}

		/*
			We've switched the evaluation order of GB1↓ and GB2↑. It's ok:
			because we've checked for len(data) at the top of this function,
			sot and eot are mutually exclusive, order doesn't matter.
		*/

		// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
		// to the right of the ×, from which we look back or forward

		// Remember previous properties to avoid lookups/lookbacks
		 := 
		if !.is(_Ignore) {
			 = 
			 = 
		}

		,  = lookup([:])
		if  == 0 {
			if  {
				// Just return the bytes, we can't do anything with them
				 = len()
				break
			}
			// Rune extends past current data, request more
			return 0, , nil
		}

		// Optimization: no rule can possibly apply
		if | == 0 { // i.e. both are zero
			break
		}

		// https://unicode.org/reports/tr29/#GB3
		if .is(_LF) && .is(_CR) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB4
		// https://unicode.org/reports/tr29/#GB5
		if ( | ).is(_Control | _CR | _LF) {
			break
		}

		// https://unicode.org/reports/tr29/#GB6
		if .is(_L|_V|_LV|_LVT) && .is(_L) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB7
		if .is(_V|_T) && .is(_LV|_V) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB8
		if .is(_T) && .is(_LVT|_T) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB9
		if .is(_Extend | _ZWJ) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB9a
		if .is(_SpacingMark) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB9b
		if .is(_Prepend) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB9c
		// TODO(clipperhouse):
		// It appears to be added in Unicode 15.1.0:
		// https://unicode.org/versions/Unicode15.1.0/#Migration
		// This package currently supports Unicode 15.0.0, so
		// out of scope for now

		// https://unicode.org/reports/tr29/#GB11
		if .is(_ExtendedPictographic) && .is(_ZWJ) && .is(_ExtendedPictographic) {
			 += 
			continue
		}

		// https://unicode.org/reports/tr29/#GB12
		// https://unicode.org/reports/tr29/#GB13
		if ( & ).is(_RegionalIndicator) {
			++

			 := %2 == 1
			if  {
				 += 
				continue
			}
		}

		// If we fall through all the above rules, it's a grapheme cluster break
		break
	}

	// Return token
	return , [:], nil
}