// package meg implements Regular Expressions for multiaddr Components. It's short for "Megular Expressions"
package meg // The developer is assumed to be familiar with the Thompson NFA approach to // regex before making changes to this file. Refer to // https://swtch.com/~rsc/regexp/regexp1.html for an introduction. import ( ) type stateKind = int const ( matchAny stateKind = (iota * -1) - 1 // done MUST be the last stateKind in this list. // Anything that is less than done is a split index done ) // MatchState is the Thompson NFA for a regular expression. type MatchState struct { capture CaptureFunc // next is is the index of the next state. in the MatchState array. next int // If codeOrKind is negative, it is a kind. // If it is negative, and less than `done`, then it is the index to the next split. // This is done to keep the `MatchState` struct small and cache friendly. codeOrKind int } type CaptureFunc func(Matchable) error // capture is a linked list of capture funcs with values. type capture struct { f CaptureFunc v Matchable prev *capture } type statesAndCaptures struct { states []int captures []*capture } func ( MatchState) () string { if .codeOrKind == done { return "done" } if .codeOrKind < done { return fmt.Sprintf("split{left: %d, right: %d}", .next, decodeSplitIdx(.codeOrKind)) } return fmt.Sprintf("match{code: %d, next: %d}", .codeOrKind, .next) } // Matchable is an interface for any thing that can be matched against by this // package. In the future, we may use multiaddr.Component types directly. type Matchable interface { Code() int // Value() returns the string representation of the matchable. Value() string // RawValue() returns the byte representation of the Value RawValue() []byte // Bytes() returns the underlying bytes of the matchable. For multiaddr // Components, this includes the protocol code and possibly the varint // encoded size. Bytes() []byte } // ListOfMatchable is anything list-like that contains Matchable items. // This allows us to convert a slice of []T as a []Matchable when *T implements // Matchable. In the future, this may not be required if Go generics allows us // to say S ~[]T, and *T implements Matchable. This may also not be required if // we move this out of its own package and depend on Multiaddr and Components // directly. type ListOfMatchable interface { Get(i int) Matchable Len() int } // Match returns whether the given Components match the Matcher // // Errors are used to communicate capture errors. // If the error is non-nil the returned bool will be false. // // Components must be a ListOfMatchable to allow us to use a slice of []T as a // []Matchable when *T implements Matchable. func [ ListOfMatchable]( Matcher, ) (bool, error) { := .states := .startIdx // Fast case for a small number of states (<128) // Avoids allocation of a slice for the visitedBitSet. := [2]uint64{} := [:] if len() > 128 { = make([]uint64, (len()+63)/64) } := statesAndCaptures{ states: make([]int, 0, 16), captures: make([]*capture, 0, 16), } := statesAndCaptures{ states: make([]int, 0, 16), captures: make([]*capture, 0, 16), } = appendState(, , , nil, ) for := range .Len() { clear() if len(.states) == 0 { return false, nil } for , := range .states { := &[] := .Get() if .codeOrKind == matchAny || (.codeOrKind >= 0 && .codeOrKind == .Code()) { := .captures[] if .capture != nil { := &capture{ f: .capture, v: , } if == nil { = } else { .prev = = } .captures[] = } = appendState(, , .next, , ) } } , = , .states = .states[:0] .captures = .captures[:0] } for , := range .states { := &[] if .codeOrKind == done { // We found a complete path. Run the captures now := .captures[] // Flip the order of the captures because we see captures from right // to left, but users expect them left to right. type struct { CaptureFunc Matchable } := make([], 0, 16) for != nil { = append(, {.f, .v}) = .prev } for := len() - 1; >= 0; -- { if := [].([].); != nil { return false, } } return true, nil } } return false, nil } // appendState is a non-recursive way of appending states to statesAndCaptures. // If a state is a split, both branches are appended to statesAndCaptures. func appendState( statesAndCaptures, []MatchState, int, *capture, []uint64) statesAndCaptures { // Local struct to hold state index and the associated capture pointer. type struct { int *capture } // Initialize the stack with the starting task. := make([], 0, 16) = append(, {, }) // Process the stack until empty. for len() > 0 { // Pop the last element (LIFO order). := len() - 1 := [] = [:] // If the state index is out of bounds, skip. if . >= len() { continue } := &[.] // Check if this state has already been visited. if [./64]&(1<<(.%64)) != 0 { continue } // Mark the state as visited. [./64] |= 1 << (. % 64) // If it's a split state (the value is less than done) then push both branches. if .codeOrKind < done { // Get the second branch from the split. := decodeSplitIdx(.codeOrKind) // Check if the next branch is a `matchAny`. If it is, we want to // deprioritize it to allow for less greedy Any behavior. if [.next].codeOrKind == matchAny { // We want to process the non-matchAny first, so we push the s.next branch first = append(, {.next, .}) = append(, {, .}) } else { // To process s.next first, push the split branch first. = append(, {, .}) = append(, {.next, .}) } } else { // Otherwise, it's a valid final state -- append it. .states = append(.states, .) .captures = append(.captures, .) } } return } const splitIdxOffset = -(done - 1) func encodeSplitIdx( int) int { return ( + splitIdxOffset) * -1 } func decodeSplitIdx( int) int { return ( * -1) - splitIdxOffset }