/*Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses amore feature full regex engine behind the scenes.It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.You'll likely be better off with the RE2 engine from the regexp package and should only use this if youneed to write very complex patterns or require compatibility with .NET.*/
package regexp2import ()var (// DefaultMatchTimeout used when running regexp matches -- "forever"DefaultMatchTimeout = time.Duration(math.MaxInt64)// DefaultUnmarshalOptions used when unmarshaling a regex from textDefaultUnmarshalOptions = None)// Regexp is the representation of a compiled regular expression.// A Regexp is safe for concurrent use by multiple goroutines.typeRegexpstruct {// A match will time out if it takes (approximately) more than // MatchTimeout. This is a safety check in case the match // encounters catastrophic backtracking. The default value // (DefaultMatchTimeout) causes all time out checking to be // suppressed. MatchTimeout time.Duration// read-only after Compile pattern string// as passed to Compile options RegexOptions// options caps map[int]int// capnum->index capnames map[string]int//capture group name -> index capslist []string//sorted list of capture group names capsize int// size of the capture array code *syntax.Code// compiled program// cache of machines for running regexp muRun *sync.Mutex runner []*runner}// Compile parses a regular expression and returns, if successful,// a Regexp object that can be used to match against text.func ( string, RegexOptions) (*Regexp, error) {// parse it , := syntax.Parse(, syntax.RegexOptions())if != nil {returnnil, }// translate it to code , := syntax.Write()if != nil {returnnil, }// return itreturn &Regexp{pattern: ,options: ,caps: .Caps,capnames: .Capnames,capslist: .Caplist,capsize: .Capsize,code: ,MatchTimeout: DefaultMatchTimeout,muRun: &sync.Mutex{}, }, nil}// MustCompile is like Compile but panics if the expression cannot be parsed.// It simplifies safe initialization of global variables holding compiled regular// expressions.func ( string, RegexOptions) *Regexp { , := Compile(, )if != nil {panic(`regexp2: Compile(` + quote() + `): ` + .Error()) }return}// Escape adds backslashes to any special characters in the input stringfunc ( string) string {returnsyntax.Escape()}// Unescape removes any backslashes from previously-escaped special characters in the input stringfunc ( string) (string, error) {returnsyntax.Unescape()}// SetTimeoutPeriod is a debug function that sets the frequency of the timeout goroutine's sleep cycle.// Defaults to 100ms. The only benefit of setting this lower is that the 1 background goroutine that manages// timeouts may exit slightly sooner after all the timeouts have expired. See Github issue #63func ( time.Duration) {clockPeriod = }// StopTimeoutClock should only be used in unit tests to prevent the timeout clock goroutine// from appearing like a leaking goroutinefunc () {stopClock()}// String returns the source text used to compile the regular expression.func ( *Regexp) () string {return .pattern}func quote( string) string {ifstrconv.CanBackquote() {return"`" + + "`" }returnstrconv.Quote()}// RegexOptions impact the runtime and parsing behavior// for each specific regex. They are setable in code as well// as in the regex pattern itself.typeRegexOptionsint32const (NoneRegexOptions = 0x0IgnoreCase = 0x0001// "i"Multiline = 0x0002// "m"ExplicitCapture = 0x0004// "n"Compiled = 0x0008// "c"Singleline = 0x0010// "s"IgnorePatternWhitespace = 0x0020// "x"RightToLeft = 0x0040// "r"Debug = 0x0080// "d"ECMAScript = 0x0100// "e"RE2 = 0x0200// RE2 (regexp package) compatibility modeUnicode = 0x0400// "u")func ( *Regexp) () bool {return .options&RightToLeft != 0}func ( *Regexp) () bool {return .options&Debug != 0}// Replace searches the input string and replaces each match found with the replacement text.// Count will limit the number of matches attempted and startAt will allow// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).// Set startAt and count to -1 to go through the whole stringfunc ( *Regexp) (, string, , int) (string, error) { , := syntax.NewReplacerData(, .caps, .capsize, .capnames, syntax.RegexOptions(.options))if != nil {return"", }//TODO: cache ReplacerDatareturnreplace(, , nil, , , )}// ReplaceFunc searches the input string and replaces each match found using the string from the evaluator// Count will limit the number of matches attempted and startAt will allow// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).// Set startAt and count to -1 to go through the whole string.func ( *Regexp) ( string, MatchEvaluator, , int) (string, error) {returnreplace(, nil, , , , )}// FindStringMatch searches the input string for a Regexp matchfunc ( *Regexp) ( string) (*Match, error) {// convert string to runesreturn .run(false, -1, getRunes())}// FindRunesMatch searches the input rune slice for a Regexp matchfunc ( *Regexp) ( []rune) (*Match, error) {return .run(false, -1, )}// FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt indexfunc ( *Regexp) ( string, int) (*Match, error) {if > len() {returnnil, errors.New("startAt must be less than the length of the input string") } , := .getRunesAndStart(, )if == -1 {// we didn't find our start index in the string -- that's a problemreturnnil, errors.New("startAt must align to the start of a valid rune in the input string") }return .run(false, , )}// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt indexfunc ( *Regexp) ( []rune, int) (*Match, error) {return .run(false, , )}// FindNextMatch returns the next match in the same input string as the match parameter.// Will return nil if there is no next match or if given a nil match.func ( *Regexp) ( *Match) (*Match, error) {if == nil {returnnil, nil }// If previous match was empty, advance by one before matching to prevent // infinite loop := .textposif .Length == 0 {if .textpos == len(.text) {returnnil, nil }if .RightToLeft() { -- } else { ++ } }return .run(false, , .text)}// MatchString return true if the string matches the regex// error will be set if a timeout occursfunc ( *Regexp) ( string) (bool, error) { , := .run(true, -1, getRunes())if != nil {returnfalse, }return != nil, nil}func ( *Regexp) ( string, int) ([]rune, int) {if < 0 {if .RightToLeft() { := getRunes()return , len() }returngetRunes(), 0 } := make([]rune, len()) := 0 := -1for , := range {if == { = } [] = ++ }if == len() { = }return [:], }func getRunes( string) []rune {return []rune()}// MatchRunes return true if the runes matches the regex// error will be set if a timeout occursfunc ( *Regexp) ( []rune) (bool, error) { , := .run(true, -1, )if != nil {returnfalse, }return != nil, nil}// GetGroupNames Returns the set of strings used to name capturing groups in the expression.func ( *Regexp) () []string {var []stringif .capslist == nil { = make([]string, .capsize)for := 0; < len(); ++ { [] = strconv.Itoa() } } else { = make([]string, len(.capslist))copy(, .capslist) }return}// GetGroupNumbers returns the integer group numbers corresponding to a group name.func ( *Regexp) () []int {var []intif .caps == nil { = make([]int, .capsize)for := 0; < len(); ++ { [] = } } else { = make([]int, len(.caps))for , := range .caps { [] = } }return}// GroupNameFromNumber retrieves a group name that corresponds to a group number.// It will return "" for and unknown group number. Unnamed groups automatically// receive a name that is the decimal string equivalent of its number.func ( *Regexp) ( int) string {if .capslist == nil {if >= 0 && < .capsize {returnstrconv.Itoa() }return"" }if .caps != nil {varboolif , = .caps[]; ! {return"" } }if >= 0 && < len(.capslist) {return .capslist[] }return""}// GroupNumberFromName returns a group number that corresponds to a group name.// Returns -1 if the name is not a recognized group name. Numbered groups// automatically get a group name that is the decimal string equivalent of its number.func ( *Regexp) ( string) int {// look up name if we have a hashtable of namesif .capnames != nil {if , := .capnames[]; {return }return -1 }// convert to an int if it looks like a number := 0for := 0; < len(); ++ { := []if > '9' || < '0' {return -1 } *= 10 += int( - '0') }// return int if it's in rangeif >= 0 && < .capsize {return }return -1}// MarshalText implements [encoding.TextMarshaler]. The output// matches that of calling the [Regexp.String] method.func ( *Regexp) () ([]byte, error) {return []byte(.String()), nil}// UnmarshalText implements [encoding.TextUnmarshaler] by calling// [Compile] on the encoded value.func ( *Regexp) ( []byte) error { , := Compile(string(), DefaultUnmarshalOptions)if != nil {return } * = *returnnil}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.