package shlex
import (
"bufio"
"errors"
"io"
"strings"
"unicode"
)
var (
ErrNoClosing = errors .New ("No closing quotation" )
ErrNoEscaped = errors .New ("No escaped character" )
)
type Tokenizer interface {
IsWord (rune ) bool
IsWhitespace (rune ) bool
IsQuote (rune ) bool
IsEscape (rune ) bool
IsEscapedQuote (rune ) bool
}
type DefaultTokenizer struct {}
func (t *DefaultTokenizer ) IsWord (r rune ) bool {
return r == '_' || unicode .IsLetter (r ) || unicode .IsNumber (r )
}
func (t *DefaultTokenizer ) IsQuote (r rune ) bool {
switch r {
case '\'' , '"' :
return true
default :
return false
}
}
func (t *DefaultTokenizer ) IsWhitespace (r rune ) bool {
return unicode .IsSpace (r )
}
func (t *DefaultTokenizer ) IsEscape (r rune ) bool {
return r == '\\'
}
func (t *DefaultTokenizer ) IsEscapedQuote (r rune ) bool {
return r == '"'
}
type Lexer struct {
reader *bufio .Reader
tokenizer Tokenizer
posix bool
whitespacesplit bool
}
func NewLexer (r io .Reader , posix , whitespacesplit bool ) *Lexer {
return &Lexer {
reader : bufio .NewReader (r ),
tokenizer : &DefaultTokenizer {},
posix : posix ,
whitespacesplit : whitespacesplit ,
}
}
func NewLexerString (s string , posix , whitespacesplit bool ) *Lexer {
return NewLexer (strings .NewReader (s ), posix , whitespacesplit )
}
func Split (s string , posix bool ) ([]string , error ) {
return NewLexerString (s , posix , true ).Split ()
}
func (l *Lexer ) SetTokenizer (t Tokenizer ) {
l .tokenizer = t
}
func (l *Lexer ) Split () ([]string , error ) {
result := make ([]string , 0 )
for {
token , err := l .readToken ()
if token != "" {
result = append (result , token )
}
if err == io .EOF {
break
} else if err != nil {
return result , err
}
}
return result , nil
}
func (l *Lexer ) readToken () (string , error ) {
t := l .tokenizer
token := ""
quoted := false
state := ' '
escapedstate := ' '
scanning :
for {
next , _ , err := l .reader .ReadRune ()
if err != nil {
if t .IsQuote (state ) {
return token , ErrNoClosing
} else if t .IsEscape (state ) {
return token , ErrNoEscaped
}
return token , err
}
switch {
case t .IsWhitespace (state ):
switch {
case t .IsWhitespace (next ):
break scanning
case l .posix && t .IsEscape (next ):
escapedstate = 'a'
state = next
case t .IsWord (next ):
token += string (next )
state = 'a'
case t .IsQuote (next ):
if !l .posix {
token += string (next )
}
state = next
default :
token = string (next )
if l .whitespacesplit {
state = 'a'
} else if token != "" || (l .posix && quoted ) {
break scanning
}
}
case t .IsQuote (state ):
quoted = true
switch {
case next == state :
if !l .posix {
token += string (next )
break scanning
} else {
state = 'a'
}
case l .posix && t .IsEscape (next ) && t .IsEscapedQuote (state ):
escapedstate = state
state = next
default :
token += string (next )
}
case t .IsEscape (state ):
if t .IsQuote (escapedstate ) && next != state && next != escapedstate {
token += string (state )
}
token += string (next )
state = escapedstate
case t .IsWord (state ):
switch {
case t .IsWhitespace (next ):
if token != "" || (l .posix && quoted ) {
break scanning
}
case l .posix && t .IsQuote (next ):
state = next
case l .posix && t .IsEscape (next ):
escapedstate = 'a'
state = next
case t .IsWord (next ) || t .IsQuote (next ):
token += string (next )
default :
if l .whitespacesplit {
token += string (next )
} else if token != "" {
l .reader .UnreadRune ()
break scanning
}
}
}
}
return token , nil
}
The pages are generated with Golds v0.8.2 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .