// Copyright 2024 Garrett D'Amore
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
// You may obtain a copy of the license at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package encoding

import (
	
	

	
	
)

const (
	// RuneError is an alias for the UTF-8 replacement rune, '\uFFFD'.
	RuneError = '\uFFFD'

	// RuneSelf is the rune below which UTF-8 and the Unicode values are
	// identical.  Its also the limit for ASCII.
	RuneSelf = 0x80

	// ASCIISub is the ASCII substitution character.
	ASCIISub = '\x1a'
)

// Charmap is a structure for setting up encodings for 8-bit character sets,
// for transforming between UTF8 and that other character set.  It has some
// ideas borrowed from golang.org/x/text/encoding/charmap, but it uses a
// different implementation.  This implementation uses maps, and supports
// user-defined maps.
//
// We do assume that a character map has a reasonable substitution character,
// and that valid encodings are stable (exactly a 1:1 map) and stateless
// (that is there is no shift character or anything like that.)  Hence this
// approach will not work for many East Asian character sets.
//
// Measurement shows little or no measurable difference in the performance of
// the two approaches.  The difference was down to a couple of nsec/op, and
// no consistent pattern as to which ran faster.  With the conversion to
// UTF-8 the code takes about 25 nsec/op.  The conversion in the reverse
// direction takes about 100 nsec/op.  (The larger cost for conversion
// from UTF-8 is most likely due to the need to convert the UTF-8 byte stream
// to a rune before conversion.
type Charmap struct {
	transform.NopResetter
	bytes map[rune]byte
	runes [256][]byte
	once  sync.Once

	// The map between bytes and runes.  To indicate that a specific
	// byte value is invalid for a charcter set, use the rune
	// utf8.RuneError.  Values that are absent from this map will
	// be assumed to have the identity mapping -- that is the default
	// is to assume ISO8859-1, where all 8-bit characters have the same
	// numeric value as their Unicode runes.  (Not to be confused with
	// the UTF-8 values, which *will* be different for non-ASCII runes.)
	//
	// If no values less than RuneSelf are changed (or have non-identity
	// mappings), then the character set is assumed to be an ASCII
	// superset, and certain assumptions and optimizations become
	// available for ASCII bytes.
	Map map[byte]rune

	// The ReplacementChar is the byte value to use for substitution.
	// It should normally be ASCIISub for ASCII encodings.  This may be
	// unset (left to zero) for mappings that are strictly ASCII supersets.
	// In that case ASCIISub will be assumed instead.
	ReplacementChar byte
}

type cmapDecoder struct {
	transform.NopResetter
	runes [256][]byte
}

type cmapEncoder struct {
	transform.NopResetter
	bytes   map[rune]byte
	replace byte
}

// Init initializes internal values of a character map.  This should
// be done early, to minimize the cost of allocation of transforms
// later.  It is not strictly necessary however, as the allocation
// functions will arrange to call it if it has not already been done.
func ( *Charmap) () {
	.once.Do(.initialize)
}

func ( *Charmap) () {
	.bytes = make(map[rune]byte)
	 := true

	for  := 0;  < 256; ++ {
		,  := .Map[byte()]
		if ! {
			 = rune()
		}
		if  < 128 &&  != rune() {
			 = false
		}
		if  != RuneError {
			.bytes[] = byte()
		}
		 := make([]byte, utf8.RuneLen())
		utf8.EncodeRune(, )
		.runes[] = 
	}
	if  && .ReplacementChar == '\x00' {
		.ReplacementChar = ASCIISub
	}
}

// NewDecoder returns a Decoder the converts from the 8-bit
// character set to UTF-8.  Unknown mappings, if any, are mapped
// to '\uFFFD'.
func ( *Charmap) () *encoding.Decoder {
	.Init()
	return &encoding.Decoder{Transformer: &cmapDecoder{runes: .runes}}
}

// NewEncoder returns a Transformer that converts from UTF8 to the
// 8-bit character set.  Unknown mappings are mapped to 0x1A.
func ( *Charmap) () *encoding.Encoder {
	.Init()
	return &encoding.Encoder{
		Transformer: &cmapEncoder{
			bytes:   .bytes,
			replace: .ReplacementChar,
		},
	}
}

func ( *cmapDecoder) (,  []byte,  bool) (int, int, error) {
	var  error
	var ,  int

	for ,  := range  {
		 := .runes[]
		 := len()

		if + > len() {
			 = transform.ErrShortDst
			break
		}
		for  := 0;  < ; ++ {
			[] = []
			++
		}
		++
	}
	return , , 
}

func ( *cmapEncoder) (,  []byte,  bool) (int, int, error) {
	var  error
	var ,  int
	for  < len() {
		if  >= len() {
			 = transform.ErrShortDst
			break
		}

		,  := utf8.DecodeRune([:])
		if  == utf8.RuneError &&  == 1 {
			// If its inconclusive due to insufficient data in
			// in the source, report it
			if  && !utf8.FullRune([:]) {
				 = transform.ErrShortSrc
				break
			}
		}

		if ,  := .bytes[];  {
			[] = 
		} else {
			[] = .replace
		}
		 += 
		++
	}

	return , , 
}