/* Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a more feature full regex engine behind the scenes. It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET. You'll likely be better off with the RE2 engine from the regexp package and should only use this if you need to write very complex patterns or require compatibility with .NET. */
package regexp2 import ( ) var ( // DefaultMatchTimeout used when running regexp matches -- "forever" DefaultMatchTimeout = time.Duration(math.MaxInt64) // DefaultUnmarshalOptions used when unmarshaling a regex from text DefaultUnmarshalOptions = None ) // Regexp is the representation of a compiled regular expression. // A Regexp is safe for concurrent use by multiple goroutines. type Regexp struct { // A match will time out if it takes (approximately) more than // MatchTimeout. This is a safety check in case the match // encounters catastrophic backtracking. The default value // (DefaultMatchTimeout) causes all time out checking to be // suppressed. MatchTimeout time.Duration // read-only after Compile pattern string // as passed to Compile options RegexOptions // options caps map[int]int // capnum->index capnames map[string]int //capture group name -> index capslist []string //sorted list of capture group names capsize int // size of the capture array code *syntax.Code // compiled program // cache of machines for running regexp muRun *sync.Mutex runner []*runner } // Compile parses a regular expression and returns, if successful, // a Regexp object that can be used to match against text. func ( string, RegexOptions) (*Regexp, error) { // parse it , := syntax.Parse(, syntax.RegexOptions()) if != nil { return nil, } // translate it to code , := syntax.Write() if != nil { return nil, } // return it return &Regexp{ pattern: , options: , caps: .Caps, capnames: .Capnames, capslist: .Caplist, capsize: .Capsize, code: , MatchTimeout: DefaultMatchTimeout, muRun: &sync.Mutex{}, }, nil } // MustCompile is like Compile but panics if the expression cannot be parsed. // It simplifies safe initialization of global variables holding compiled regular // expressions. func ( string, RegexOptions) *Regexp { , := Compile(, ) if != nil { panic(`regexp2: Compile(` + quote() + `): ` + .Error()) } return } // Escape adds backslashes to any special characters in the input string func ( string) string { return syntax.Escape() } // Unescape removes any backslashes from previously-escaped special characters in the input string func ( string) (string, error) { return syntax.Unescape() } // SetTimeoutPeriod is a debug function that sets the frequency of the timeout goroutine's sleep cycle. // Defaults to 100ms. The only benefit of setting this lower is that the 1 background goroutine that manages // timeouts may exit slightly sooner after all the timeouts have expired. See Github issue #63 func ( time.Duration) { clockPeriod = } // StopTimeoutClock should only be used in unit tests to prevent the timeout clock goroutine // from appearing like a leaking goroutine func () { stopClock() } // String returns the source text used to compile the regular expression. func ( *Regexp) () string { return .pattern } func quote( string) string { if strconv.CanBackquote() { return "`" + + "`" } return strconv.Quote() } // RegexOptions impact the runtime and parsing behavior // for each specific regex. They are setable in code as well // as in the regex pattern itself. type RegexOptions int32 const ( None RegexOptions = 0x0 IgnoreCase = 0x0001 // "i" Multiline = 0x0002 // "m" ExplicitCapture = 0x0004 // "n" Compiled = 0x0008 // "c" Singleline = 0x0010 // "s" IgnorePatternWhitespace = 0x0020 // "x" RightToLeft = 0x0040 // "r" Debug = 0x0080 // "d" ECMAScript = 0x0100 // "e" RE2 = 0x0200 // RE2 (regexp package) compatibility mode Unicode = 0x0400 // "u" ) func ( *Regexp) () bool { return .options&RightToLeft != 0 } func ( *Regexp) () bool { return .options&Debug != 0 } // Replace searches the input string and replaces each match found with the replacement text. // Count will limit the number of matches attempted and startAt will allow // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option). // Set startAt and count to -1 to go through the whole string func ( *Regexp) (, string, , int) (string, error) { , := syntax.NewReplacerData(, .caps, .capsize, .capnames, syntax.RegexOptions(.options)) if != nil { return "", } //TODO: cache ReplacerData return replace(, , nil, , , ) } // ReplaceFunc searches the input string and replaces each match found using the string from the evaluator // Count will limit the number of matches attempted and startAt will allow // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option). // Set startAt and count to -1 to go through the whole string. func ( *Regexp) ( string, MatchEvaluator, , int) (string, error) { return replace(, nil, , , , ) } // FindStringMatch searches the input string for a Regexp match func ( *Regexp) ( string) (*Match, error) { // convert string to runes return .run(false, -1, getRunes()) } // FindRunesMatch searches the input rune slice for a Regexp match func ( *Regexp) ( []rune) (*Match, error) { return .run(false, -1, ) } // FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index func ( *Regexp) ( string, int) (*Match, error) { if > len() { return nil, errors.New("startAt must be less than the length of the input string") } , := .getRunesAndStart(, ) if == -1 { // we didn't find our start index in the string -- that's a problem return nil, errors.New("startAt must align to the start of a valid rune in the input string") } return .run(false, , ) } // FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index func ( *Regexp) ( []rune, int) (*Match, error) { return .run(false, , ) } // FindNextMatch returns the next match in the same input string as the match parameter. // Will return nil if there is no next match or if given a nil match. func ( *Regexp) ( *Match) (*Match, error) { if == nil { return nil, nil } // If previous match was empty, advance by one before matching to prevent // infinite loop := .textpos if .Length == 0 { if .textpos == len(.text) { return nil, nil } if .RightToLeft() { -- } else { ++ } } return .run(false, , .text) } // MatchString return true if the string matches the regex // error will be set if a timeout occurs func ( *Regexp) ( string) (bool, error) { , := .run(true, -1, getRunes()) if != nil { return false, } return != nil, nil } func ( *Regexp) ( string, int) ([]rune, int) { if < 0 { if .RightToLeft() { := getRunes() return , len() } return getRunes(), 0 } := make([]rune, len()) := 0 := -1 for , := range { if == { = } [] = ++ } if == len() { = } return [:], } func getRunes( string) []rune { return []rune() } // MatchRunes return true if the runes matches the regex // error will be set if a timeout occurs func ( *Regexp) ( []rune) (bool, error) { , := .run(true, -1, ) if != nil { return false, } return != nil, nil } // GetGroupNames Returns the set of strings used to name capturing groups in the expression. func ( *Regexp) () []string { var []string if .capslist == nil { = make([]string, .capsize) for := 0; < len(); ++ { [] = strconv.Itoa() } } else { = make([]string, len(.capslist)) copy(, .capslist) } return } // GetGroupNumbers returns the integer group numbers corresponding to a group name. func ( *Regexp) () []int { var []int if .caps == nil { = make([]int, .capsize) for := 0; < len(); ++ { [] = } } else { = make([]int, len(.caps)) for , := range .caps { [] = } } return } // GroupNameFromNumber retrieves a group name that corresponds to a group number. // It will return "" for and unknown group number. Unnamed groups automatically // receive a name that is the decimal string equivalent of its number. func ( *Regexp) ( int) string { if .capslist == nil { if >= 0 && < .capsize { return strconv.Itoa() } return "" } if .caps != nil { var bool if , = .caps[]; ! { return "" } } if >= 0 && < len(.capslist) { return .capslist[] } return "" } // GroupNumberFromName returns a group number that corresponds to a group name. // Returns -1 if the name is not a recognized group name. Numbered groups // automatically get a group name that is the decimal string equivalent of its number. func ( *Regexp) ( string) int { // look up name if we have a hashtable of names if .capnames != nil { if , := .capnames[]; { return } return -1 } // convert to an int if it looks like a number := 0 for := 0; < len(); ++ { := [] if > '9' || < '0' { return -1 } *= 10 += int( - '0') } // return int if it's in range if >= 0 && < .capsize { return } return -1 } // MarshalText implements [encoding.TextMarshaler]. The output // matches that of calling the [Regexp.String] method. func ( *Regexp) () ([]byte, error) { return []byte(.String()), nil } // UnmarshalText implements [encoding.TextUnmarshaler] by calling // [Compile] on the encoded value. func ( *Regexp) ( []byte) error { , := Compile(string(), DefaultUnmarshalOptions) if != nil { return } * = * return nil }