package chroma

import (
	
	
	
	
	
	
	
	
	

	
)

// A Rule is the fundamental matching unit of the Regex lexer state machine.
type Rule struct {
	Pattern string
	Type    Emitter
	Mutator Mutator
}

// Words creates a regex that matches any of the given literal words.
func (,  string,  ...string) string {
	sort.Slice(, func(,  int) bool {
		return len([]) < len([])
	})
	for ,  := range  {
		[] = regexp.QuoteMeta()
	}
	return  + `(` + strings.Join(, `|`) + `)` + 
}

// Tokenise text using lexer, returning tokens as a slice.
func ( Lexer,  *TokeniseOptions,  string) ([]Token, error) {
	var  []Token
	,  := .Tokenise(, )
	if  != nil {
		return nil, 
	}
	for  := ();  != EOF;  = () {
		 = append(, )
	}
	return , nil
}

// Rules maps from state to a sequence of Rules.
type Rules map[string][]Rule

// Rename clones rules then a rule.
func ( Rules) (,  string) Rules {
	 = .Clone()
	[] = []
	delete(, )
	return 
}

// Clone returns a clone of the Rules.
func ( Rules) () Rules {
	 := map[string][]Rule{}
	for ,  := range  {
		[] = make([]Rule, len())
		copy([], )
	}
	return 
}

// Merge creates a clone of "r" then merges "rules" into the clone.
func ( Rules) ( Rules) Rules {
	 := .Clone()
	for ,  := range .Clone() {
		[] = 
	}
	return 
}

// MustNewLexer creates a new Lexer with deferred rules generation or panics.
func ( *Config,  func() Rules) *RegexLexer {
	,  := NewLexer(, )
	if  != nil {
		panic()
	}
	return 
}

// NewLexer creates a new regex-based Lexer.
//
// "rules" is a state machine transition map. Each key is a state. Values are sets of rules
// that match input, optionally modify lexer state, and output tokens.
func ( *Config,  func() Rules) (*RegexLexer, error) {
	if  == nil {
		 = &Config{}
	}
	for ,  := range append(.Filenames, .AliasFilenames...) {
		,  := filepath.Match(, "")
		if  != nil {
			return nil, fmt.Errorf("%s: %q is not a valid glob: %w", .Name, , )
		}
	}
	 := &RegexLexer{
		config:         ,
		fetchRulesFunc: func() (Rules, error) { return (), nil },
	}
	// One-off code to generate XML lexers in the Chroma source tree.
	// var nameCleanRe = regexp.MustCompile(`[^-+A-Za-z0-9_]`)
	// name := strings.ToLower(nameCleanRe.ReplaceAllString(config.Name, "_"))
	// data, err := Marshal(r)
	// if err != nil {
	// 	if errors.Is(err, ErrNotSerialisable) {
	// 		fmt.Fprintf(os.Stderr, "warning: %q: %s\n", name, err)
	// 		return r, nil
	// 	}
	// 	return nil, err
	// }
	// _, file, _, ok := runtime.Caller(2)
	// if !ok {
	// 	panic("??")
	// }
	// fmt.Println(file)
	// if strings.Contains(file, "/lexers/") {
	// 	dir := filepath.Join(filepath.Dir(file), "embedded")
	// 	err = os.MkdirAll(dir, 0700)
	// 	if err != nil {
	// 		return nil, err
	// 	}
	// 	filename := filepath.Join(dir, name) + ".xml"
	// 	fmt.Println(filename)
	// 	err = ioutil.WriteFile(filename, data, 0600)
	// 	if err != nil {
	// 		return nil, err
	// 	}
	// }
	return , nil
}

// Trace enables debug tracing.
func ( *RegexLexer) ( bool) *RegexLexer {
	.trace = 
	return 
}

// A CompiledRule is a Rule with a pre-compiled regex.
//
// Note that regular expressions are lazily compiled on first use of the lexer.
type CompiledRule struct {
	Rule
	Regexp *regexp2.Regexp
	flags  string
}

// CompiledRules is a map of rule name to sequence of compiled rules in that rule.
type CompiledRules map[string][]*CompiledRule

// LexerState contains the state for a single lex.
type LexerState struct {
	Lexer    *RegexLexer
	Registry *LexerRegistry
	Text     []rune
	Pos      int
	Rules    CompiledRules
	Stack    []string
	State    string
	Rule     int
	// Group matches.
	Groups []string
	// Named Group matches.
	NamedGroups map[string]string
	// Custum context for mutators.
	MutatorContext map[interface{}]interface{}
	iteratorStack  []Iterator
	options        *TokeniseOptions
	newlineAdded   bool
}

// Set mutator context.
func ( *LexerState) ( interface{},  interface{}) {
	.MutatorContext[] = 
}

// Get mutator context.
func ( *LexerState) ( interface{}) interface{} {
	return .MutatorContext[]
}

// Iterator returns the next Token from the lexer.
func ( *LexerState) () Token { // nolint: gocognit
	 := len(.Text)
	if .newlineAdded {
		--
	}
	for .Pos <  && len(.Stack) > 0 {
		// Exhaust the iterator stack, if any.
		for len(.iteratorStack) > 0 {
			 := len(.iteratorStack) - 1
			 := .iteratorStack[]()
			if  == EOF {
				.iteratorStack = .iteratorStack[:]
				continue
			}
			return 
		}

		.State = .Stack[len(.Stack)-1]
		if .Lexer.trace {
			fmt.Fprintf(os.Stderr, "%s: pos=%d, text=%q\n", .State, .Pos, string(.Text[.Pos:]))
		}
		,  := .Rules[.State]
		if ! {
			panic("unknown state " + .State)
		}
		, , ,  := matchRules(.Text, .Pos, )
		// No match.
		if  == nil {
			// From Pygments :\
			//
			// If the RegexLexer encounters a newline that is flagged as an error token, the stack is
			// emptied and the lexer continues scanning in the 'root' state. This can help producing
			// error-tolerant highlighting for erroneous input, e.g. when a single-line string is not
			// closed.
			if .Text[.Pos] == '\n' && .State != .options.State {
				.Stack = []string{.options.State}
				continue
			}
			.Pos++
			return Token{Error, string(.Text[.Pos-1 : .Pos])}
		}
		.Rule = 
		.Groups = 
		.NamedGroups = 
		.Pos += utf8.RuneCountInString([0])
		if .Mutator != nil {
			if  := .Mutator.Mutate();  != nil {
				panic()
			}
		}
		if .Type != nil {
			.iteratorStack = append(.iteratorStack, .Type.Emit(.Groups, ))
		}
	}
	// Exhaust the IteratorStack, if any.
	// Duplicate code, but eh.
	for len(.iteratorStack) > 0 {
		 := len(.iteratorStack) - 1
		 := .iteratorStack[]()
		if  == EOF {
			.iteratorStack = .iteratorStack[:]
			continue
		}
		return 
	}

	// If we get to here and we still have text, return it as an error.
	if .Pos != len(.Text) && len(.Stack) == 0 {
		 := string(.Text[.Pos:])
		.Pos = len(.Text)
		return Token{Type: Error, Value: }
	}
	return EOF
}

// RegexLexer is the default lexer implementation used in Chroma.
type RegexLexer struct {
	registry *LexerRegistry // The LexerRegistry this Lexer is associated with, if any.
	config   *Config
	analyser func(text string) float32
	trace    bool

	mu             sync.Mutex
	compiled       bool
	rawRules       Rules
	rules          map[string][]*CompiledRule
	fetchRulesFunc func() (Rules, error)
	compileOnce    sync.Once
}

func ( *RegexLexer) () string {
	return .config.Name
}

// Rules in the Lexer.
func ( *RegexLexer) () (Rules, error) {
	if  := .needRules();  != nil {
		return nil, 
	}
	return .rawRules, nil
}

// SetRegistry the lexer will use to lookup other lexers if necessary.
func ( *RegexLexer) ( *LexerRegistry) Lexer {
	.registry = 
	return 
}

// SetAnalyser sets the analyser function used to perform content inspection.
func ( *RegexLexer) ( func( string) float32) Lexer {
	.analyser = 
	return 
}

// AnalyseText scores how likely a fragment of text is to match this lexer, between 0.0 and 1.0.
func ( *RegexLexer) ( string) float32 {
	if .analyser != nil {
		return .analyser()
	}
	return 0
}

// SetConfig replaces the Config for this Lexer.
func ( *RegexLexer) ( *Config) *RegexLexer {
	.config = 
	return 
}

// Config returns the Config for this Lexer.
func ( *RegexLexer) () *Config {
	return .config
}

// Regex compilation is deferred until the lexer is used. This is to avoid significant init() time costs.
func ( *RegexLexer) () ( error) {
	.mu.Lock()
	defer .mu.Unlock()
	if .compiled {
		return nil
	}
	for ,  := range .rules {
		for ,  := range  {
			if .Regexp == nil {
				 := "(?:" + .Pattern + ")"
				if .flags != "" {
					 = "(?" + .flags + ")" + 
				}
				 = `\G` + 
				.Regexp,  = regexp2.Compile(, 0)
				if  != nil {
					return fmt.Errorf("failed to compile rule %s.%d: %s", , , )
				}
				.Regexp.MatchTimeout = time.Millisecond * 250
			}
		}
	}
:
	 := map[LexerMutator]bool{}
	for  := range .rules {
		for  := 0;  < len(.rules[]); ++ {
			 := .rules[][]
			if ,  := .Mutator.(LexerMutator);  {
				if [] {
					return fmt.Errorf("saw mutator %T twice; this should not happen", )
				}
				[] = true
				if  := .MutateLexer(.rules, , );  != nil {
					return 
				}
				// Process the rules again in case the mutator added/removed rules.
				//
				// This sounds bad, but shouldn't be significant in practice.
				goto 
			}
		}
	}
	.compiled = true
	return nil
}

func ( *RegexLexer) () error {
	,  := .fetchRulesFunc()
	if  != nil {
		return fmt.Errorf("%s: failed to compile rules: %w", .config.Name, )
	}
	if ,  := ["root"]; ! {
		return fmt.Errorf("no \"root\" state")
	}
	 := map[string][]*CompiledRule{}
	for ,  := range  {
		[] = nil
		for ,  := range  {
			 := ""
			if !.config.NotMultiline {
				 += "m"
			}
			if .config.CaseInsensitive {
				 += "i"
			}
			if .config.DotAll {
				 += "s"
			}
			[] = append([], &CompiledRule{Rule: , flags: })
		}
	}

	.rawRules = 
	.rules = 
	return nil
}

func ( *RegexLexer) () error {
	var  error
	if .fetchRulesFunc != nil {
		.compileOnce.Do(func() {
			 = .fetchRules()
		})
	}
	if  := .maybeCompile();  != nil {
		return 
	}
	return 
}

// Tokenise text using lexer, returning an iterator.
func ( *RegexLexer) ( *TokeniseOptions,  string) (Iterator, error) {
	 := .needRules()
	if  != nil {
		return nil, 
	}
	if  == nil {
		 = defaultOptions
	}
	if .EnsureLF {
		 = ensureLF()
	}
	 := false
	if !.Nested && .config.EnsureNL && !strings.HasSuffix(, "\n") {
		 += "\n"
		 = true
	}
	 := &LexerState{
		Registry:       .registry,
		newlineAdded:   ,
		options:        ,
		Lexer:          ,
		Text:           []rune(),
		Stack:          []string{.State},
		Rules:          .rules,
		MutatorContext: map[interface{}]interface{}{},
	}
	return .Iterator, nil
}

// MustRules is like Rules() but will panic on error.
func ( *RegexLexer) () Rules {
	,  := .Rules()
	if  != nil {
		panic()
	}
	return 
}

func matchRules( []rune,  int,  []*CompiledRule) (int, *CompiledRule, []string, map[string]string) {
	for ,  := range  {
		,  := .Regexp.FindRunesMatchStartingAt(, )
		if  != nil &&  == nil && .Index ==  {
			 := []string{}
			 := make(map[string]string)
			for ,  := range .Groups() {
				[.Name] = .String()
				 = append(, .String())
			}
			return , , , 
		}
	}
	return 0, &CompiledRule{}, nil, nil
}

// replace \r and \r\n with \n
// same as strings.ReplaceAll but more efficient
func ensureLF( string) string {
	 := make([]byte, len())
	var  int
	for  := 0;  < len(); ++ {
		 := []
		if  == '\r' {
			if  < len()-1 && [+1] == '\n' {
				continue
			}
			 = '\n'
		}
		[] = 
		++
	}
	return string([:])
}