// package meg implements Regular Expressions for multiaddr Components. It's short for "Megular Expressions"
package meg// The developer is assumed to be familiar with the Thompson NFA approach to// regex before making changes to this file. Refer to// https://swtch.com/~rsc/regexp/regexp1.html for an introduction.import ()type stateKind = intconst ( matchAny stateKind = (iota * -1) - 1// done MUST be the last stateKind in this list. // Anything that is less than done is a split index done)// MatchState is the Thompson NFA for a regular expression.typeMatchStatestruct { capture CaptureFunc// next is is the index of the next state. in the MatchState array. next int// If codeOrKind is negative, it is a kind. // If it is negative, and less than `done`, then it is the index to the next split. // This is done to keep the `MatchState` struct small and cache friendly. codeOrKind int}typeCaptureFuncfunc(Matchable) error// capture is a linked list of capture funcs with values.type capture struct { f CaptureFunc v Matchable prev *capture}type statesAndCaptures struct { states []int captures []*capture}func ( MatchState) () string {if .codeOrKind == done {return"done" }if .codeOrKind < done {returnfmt.Sprintf("split{left: %d, right: %d}", .next, decodeSplitIdx(.codeOrKind)) }returnfmt.Sprintf("match{code: %d, next: %d}", .codeOrKind, .next)}// Matchable is an interface for any thing that can be matched against by this// package. In the future, we may use multiaddr.Component types directly.typeMatchableinterface {Code() int// Value() returns the string representation of the matchable.Value() string// RawValue() returns the byte representation of the ValueRawValue() []byte// Bytes() returns the underlying bytes of the matchable. For multiaddr // Components, this includes the protocol code and possibly the varint // encoded size.Bytes() []byte}// ListOfMatchable is anything list-like that contains Matchable items.// This allows us to convert a slice of []T as a []Matchable when *T implements// Matchable. In the future, this may not be required if Go generics allows us// to say S ~[]T, and *T implements Matchable. This may also not be required if// we move this out of its own package and depend on Multiaddr and Components// directly.typeListOfMatchableinterface {Get(i int) MatchableLen() int}// Match returns whether the given Components match the Matcher//// Errors are used to communicate capture errors.// If the error is non-nil the returned bool will be false.//// Components must be a ListOfMatchable to allow us to use a slice of []T as a// []Matchable when *T implements Matchable.func [ ListOfMatchable]( Matcher, ) (bool, error) { := .states := .startIdx// Fast case for a small number of states (<128) // Avoids allocation of a slice for the visitedBitSet. := [2]uint64{} := [:]iflen() > 128 { = make([]uint64, (len()+63)/64) } := statesAndCaptures{states: make([]int, 0, 16),captures: make([]*capture, 0, 16), } := statesAndCaptures{states: make([]int, 0, 16),captures: make([]*capture, 0, 16), } = appendState(, , , nil, )for := range .Len() {clear()iflen(.states) == 0 {returnfalse, nil }for , := range .states { := &[] := .Get()if .codeOrKind == matchAny || (.codeOrKind >= 0 && .codeOrKind == .Code()) { := .captures[]if .capture != nil { := &capture{f: .capture,v: , }if == nil { = } else { .prev = = } .captures[] = } = appendState(, , .next, , ) } } , = , .states = .states[:0] .captures = .captures[:0] }for , := range .states { := &[]if .codeOrKind == done {// We found a complete path. Run the captures now := .captures[]// Flip the order of the captures because we see captures from right // to left, but users expect them left to right.typestruct {CaptureFuncMatchable } := make([], 0, 16)for != nil { = append(, {.f, .v}) = .prev }for := len() - 1; >= 0; -- {if := [].([].); != nil {returnfalse, } }returntrue, nil } }returnfalse, nil}// appendState is a non-recursive way of appending states to statesAndCaptures.// If a state is a split, both branches are appended to statesAndCaptures.func appendState( statesAndCaptures, []MatchState, int, *capture, []uint64) statesAndCaptures {// Local struct to hold state index and the associated capture pointer.typestruct {int *capture }// Initialize the stack with the starting task. := make([], 0, 16) = append(, {, })// Process the stack until empty.forlen() > 0 {// Pop the last element (LIFO order). := len() - 1 := [] = [:]// If the state index is out of bounds, skip.if . >= len() {continue } := &[.]// Check if this state has already been visited.if [./64]&(1<<(.%64)) != 0 {continue }// Mark the state as visited. [./64] |= 1 << (. % 64)// If it's a split state (the value is less than done) then push both branches.if .codeOrKind < done {// Get the second branch from the split. := decodeSplitIdx(.codeOrKind)// Check if the next branch is a `matchAny`. If it is, we want to // deprioritize it to allow for less greedy Any behavior.if [.next].codeOrKind == matchAny {// We want to process the non-matchAny first, so we push the s.next branch first = append(, {.next, .}) = append(, {, .}) } else {// To process s.next first, push the split branch first. = append(, {, .}) = append(, {.next, .}) } } else {// Otherwise, it's a valid final state -- append it. .states = append(.states, .) .captures = append(.captures, .) } }return}const splitIdxOffset = -(done - 1)func encodeSplitIdx( int) int {return ( + splitIdxOffset) * -1}func decodeSplitIdx( int) int {return ( * -1) - splitIdxOffset}
The pages are generated with Goldsv0.8.2. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.