// Package cascadia is an implementation of CSS selectors.
package cascadia import ( ) // a parser for CSS selectors type parser struct { s string // the source text i int // the current position // if `false`, parsing a pseudo-element // returns an error. acceptPseudoElements bool } // parseEscape parses a backslash escape. func ( *parser) () ( string, error) { if len(.s) < .i+2 || .s[.i] != '\\' { return "", errors.New("invalid escape sequence") } := .i + 1 := .s[] switch { case == '\r' || == '\n' || == '\f': return "", errors.New("escaped line ending outside string") case hexDigit(): // unicode escape (hex) var int for = ; < +6 && < len(.s) && hexDigit(.s[]); ++ { // empty } , := strconv.ParseUint(.s[:], 16, 64) if len(.s) > { switch .s[] { case '\r': ++ if len(.s) > && .s[] == '\n' { ++ } case ' ', '\t', '\n', '\f': ++ } } .i = return string(rune()), nil } // Return the literal character after the backslash. = .s[ : +1] .i += 2 return , nil } // toLowerASCII returns s with all ASCII capital letters lowercased. func toLowerASCII( string) string { var []byte for := 0; < len(); ++ { if := []; 'A' <= && <= 'Z' { if == nil { = make([]byte, len()) copy(, ) } [] = [] + ('a' - 'A') } } if == nil { return } return string() } func hexDigit( byte) bool { return '0' <= && <= '9' || 'a' <= && <= 'f' || 'A' <= && <= 'F' } // nameStart returns whether c can be the first character of an identifier // (not counting an initial hyphen, or an escape sequence). func nameStart( byte) bool { return 'a' <= && <= 'z' || 'A' <= && <= 'Z' || == '_' || > 127 } // nameChar returns whether c can be a character within an identifier // (not counting an escape sequence). func nameChar( byte) bool { return 'a' <= && <= 'z' || 'A' <= && <= 'Z' || == '_' || > 127 || == '-' || '0' <= && <= '9' } // parseIdentifier parses an identifier. func ( *parser) () ( string, error) { const = '-' var int for len(.s) > .i && .s[.i] == { .i++ ++ } if len(.s) <= .i { return "", errors.New("expected identifier, found EOF instead") } if := .s[.i]; !(nameStart() || == '\\') { return "", fmt.Errorf("expected identifier, found %c instead", ) } , = .parseName() if > 0 && == nil { = strings.Repeat(string(), ) + } return } // parseName parses a name (which is like an identifier, but doesn't have // extra restrictions on the first character). func ( *parser) () ( string, error) { := .i : for < len(.s) { := .s[] switch { case nameChar(): := for < len(.s) && nameChar(.s[]) { ++ } += .s[:] case == '\\': .i = , := .parseEscape() if != nil { return "", } = .i += default: break } } if == "" { return "", errors.New("expected name, found EOF instead") } .i = return , nil } // parseString parses a single- or double-quoted string. func ( *parser) () ( string, error) { := .i if len(.s) < +2 { return "", errors.New("expected string, found EOF instead") } := .s[] ++ : for < len(.s) { switch .s[] { case '\\': if len(.s) > +1 { switch := .s[+1]; { case '\r': if len(.s) > +2 && .s[+2] == '\n' { += 3 continue } fallthrough case '\n', '\f': += 2 continue } } .i = , := .parseEscape() if != nil { return "", } = .i += case : break case '\r', '\n', '\f': return "", errors.New("unexpected end of line in string") default: := for < len(.s) { if := .s[]; == || == '\\' || == '\r' || == '\n' || == '\f' { break } ++ } += .s[:] } } if >= len(.s) { return "", errors.New("EOF in string") } // Consume the final quote. ++ .i = return , nil } // parseRegex parses a regular expression; the end is defined by encountering an // unmatched closing ')' or ']' which is not consumed func ( *parser) () ( *regexp.Regexp, error) { := .i if len(.s) < +2 { return nil, errors.New("expected regular expression, found EOF instead") } // number of open parens or brackets; // when it becomes negative, finished parsing regex := 0 : for < len(.s) { switch .s[] { case '(', '[': ++ case ')', ']': -- if < 0 { break } } ++ } if >= len(.s) { return nil, errors.New("EOF in regular expression") } , = regexp.Compile(.s[.i:]) .i = return , } // skipWhitespace consumes whitespace characters and comments. // It returns true if there was actually anything to skip. func ( *parser) () bool { := .i for < len(.s) { switch .s[] { case ' ', '\t', '\r', '\n', '\f': ++ continue case '/': if strings.HasPrefix(.s[:], "/*") { := strings.Index(.s[+len("/*"):], "*/") if != -1 { += + len("/**/") continue } } } break } if > .i { .i = return true } return false } // consumeParenthesis consumes an opening parenthesis and any following // whitespace. It returns true if there was actually a parenthesis to skip. func ( *parser) () bool { if .i < len(.s) && .s[.i] == '(' { .i++ .skipWhitespace() return true } return false } // consumeClosingParenthesis consumes a closing parenthesis and any preceding // whitespace. It returns true if there was actually a parenthesis to skip. func ( *parser) () bool { := .i .skipWhitespace() if .i < len(.s) && .s[.i] == ')' { .i++ return true } .i = return false } // parseTypeSelector parses a type selector (one that matches by tag name). func ( *parser) () ( tagSelector, error) { , := .parseIdentifier() if != nil { return } return tagSelector{tag: toLowerASCII()}, nil } // parseIDSelector parses a selector that matches by id attribute. func ( *parser) () (idSelector, error) { if .i >= len(.s) { return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead") } if .s[.i] != '#' { return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", .s[.i]) } .i++ , := .parseName() if != nil { return idSelector{}, } return idSelector{id: }, nil } // parseClassSelector parses a selector that matches by class attribute. func ( *parser) () (classSelector, error) { if .i >= len(.s) { return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead") } if .s[.i] != '.' { return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", .s[.i]) } .i++ , := .parseIdentifier() if != nil { return classSelector{}, } return classSelector{class: }, nil } // parseAttributeSelector parses a selector that matches by attribute value. func ( *parser) () (attrSelector, error) { if .i >= len(.s) { return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") } if .s[.i] != '[' { return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", .s[.i]) } .i++ .skipWhitespace() , := .parseIdentifier() if != nil { return attrSelector{}, } = toLowerASCII() .skipWhitespace() if .i >= len(.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if .s[.i] == ']' { .i++ return attrSelector{key: , operation: ""}, nil } if .i+2 >= len(.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } := .s[.i : .i+2] if [0] == '=' { = "=" } else if [1] != '=' { return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, ) } .i += len() .skipWhitespace() if .i >= len(.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } var string var *regexp.Regexp if == "#=" { , = .parseRegex() } else { switch .s[.i] { case '\'', '"': , = .parseString() default: , = .parseIdentifier() } } if != nil { return attrSelector{}, } .skipWhitespace() if .i >= len(.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } // check if the attribute contains an ignore case flag := false if .s[.i] == 'i' || .s[.i] == 'I' { = true .i++ } .skipWhitespace() if .i >= len(.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if .s[.i] != ']' { return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", .s[.i]) } .i++ switch { case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=": return attrSelector{key: , val: , operation: , regexp: , insensitive: }, nil default: return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", ) } } var ( errExpectedParenthesis = errors.New("expected '(' but didn't find it") errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") errUnmatchedParenthesis = errors.New("unmatched '('") ) // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. // https://drafts.csswg.org/selectors-3/#pseudo-elements // Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. func ( *parser) () ( Sel, string, error) { if .i >= len(.s) { return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") } if .s[.i] != ':' { return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", .s[.i]) } .i++ var bool if .i >= len(.s) { return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") } if .s[.i] == ':' { // we found a pseudo-element = true .i++ } , := .parseIdentifier() if != nil { return } = toLowerASCII() if && ( != "after" && != "backdrop" && != "before" && != "cue" && != "first-letter" && != "first-line" && != "grammar-error" && != "marker" && != "placeholder" && != "selection" && != "spelling-error") { return , "", fmt.Errorf("unknown pseudoelement :%s", ) } switch { case "not", "has", "haschild": if !.consumeParenthesis() { return , "", errExpectedParenthesis } , := .parseSelectorGroup() if != nil { return , "", } if !.consumeClosingParenthesis() { return , "", errExpectedClosingParenthesis } = relativePseudoClassSelector{name: , match: } case "contains", "containsown": if !.consumeParenthesis() { return , "", errExpectedParenthesis } if .i == len(.s) { return , "", errUnmatchedParenthesis } var string switch .s[.i] { case '\'', '"': , = .parseString() default: , = .parseIdentifier() } if != nil { return , "", } = strings.ToLower() .skipWhitespace() if .i >= len(.s) { return , "", errors.New("unexpected EOF in pseudo selector") } if !.consumeClosingParenthesis() { return , "", errExpectedClosingParenthesis } = containsPseudoClassSelector{own: == "containsown", value: } case "matches", "matchesown": if !.consumeParenthesis() { return , "", errExpectedParenthesis } , := .parseRegex() if != nil { return , "", } if .i >= len(.s) { return , "", errors.New("unexpected EOF in pseudo selector") } if !.consumeClosingParenthesis() { return , "", errExpectedClosingParenthesis } = regexpPseudoClassSelector{own: == "matchesown", regexp: } case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": if !.consumeParenthesis() { return , "", errExpectedParenthesis } , , := .parseNth() if != nil { return , "", } if !.consumeClosingParenthesis() { return , "", errExpectedClosingParenthesis } := == "nth-last-child" || == "nth-last-of-type" := == "nth-of-type" || == "nth-last-of-type" = nthPseudoClassSelector{a: , b: , last: , ofType: } case "first-child": = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false} case "last-child": = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true} case "first-of-type": = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false} case "last-of-type": = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true} case "only-child": = onlyChildPseudoClassSelector{ofType: false} case "only-of-type": = onlyChildPseudoClassSelector{ofType: true} case "input": = inputPseudoClassSelector{} case "empty": = emptyElementPseudoClassSelector{} case "root": = rootPseudoClassSelector{} case "link": = linkPseudoClassSelector{} case "lang": if !.consumeParenthesis() { return , "", errExpectedParenthesis } if .i == len(.s) { return , "", errUnmatchedParenthesis } , := .parseIdentifier() if != nil { return , "", } = strings.ToLower() .skipWhitespace() if .i >= len(.s) { return , "", errors.New("unexpected EOF in pseudo selector") } if !.consumeClosingParenthesis() { return , "", errExpectedClosingParenthesis } = langPseudoClassSelector{lang: } case "enabled": = enabledPseudoClassSelector{} case "disabled": = disabledPseudoClassSelector{} case "checked": = checkedPseudoClassSelector{} case "visited", "hover", "active", "focus", "target": // Not applicable in a static context: never match. = neverMatchSelector{value: ":" + } case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": return nil, , nil default: return , "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", ) } return } // parseInteger parses a decimal integer. func ( *parser) () (int, error) { := .i := for < len(.s) && '0' <= .s[] && .s[] <= '9' { ++ } if == { return 0, errors.New("expected integer, but didn't find it") } .i = , := strconv.Atoi(.s[:]) if != nil { return 0, } return , nil } // parseNth parses the argument for :nth-child (normally of the form an+b). func ( *parser) () (, int, error) { // initial state if .i >= len(.s) { goto } switch .s[.i] { case '-': .i++ goto case '+': .i++ goto case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': goto case 'n', 'N': = 1 .i++ goto case 'o', 'O', 'e', 'E': , := .parseName() if != nil { return 0, 0, } = toLowerASCII() if == "odd" { return 2, 1, nil } if == "even" { return 2, 0, nil } return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", ) default: goto } : if .i >= len(.s) { goto } switch .s[.i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': , = .parseInteger() if != nil { return 0, 0, } goto case 'n', 'N': = 1 .i++ goto default: goto } : if .i >= len(.s) { goto } switch .s[.i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': , = .parseInteger() if != nil { return 0, 0, } = - goto case 'n', 'N': = -1 .i++ goto default: goto } : if .i >= len(.s) { goto } switch .s[.i] { case 'n', 'N': .i++ goto default: // The number we read as a is actually b. return 0, , nil } : .skipWhitespace() if .i >= len(.s) { goto } switch .s[.i] { case '+': .i++ .skipWhitespace() , = .parseInteger() if != nil { return 0, 0, } return , , nil case '-': .i++ .skipWhitespace() , = .parseInteger() if != nil { return 0, 0, } return , -, nil default: return , 0, nil } : return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b") : return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b") } // parseSimpleSelectorSequence parses a selector sequence that applies to // a single element. func ( *parser) () (Sel, error) { var []Sel if .i >= len(.s) { return nil, errors.New("expected selector, found EOF instead") } switch .s[.i] { case '*': // It's the universal selector. Just skip over it, since it doesn't affect the meaning. .i++ if .i+2 < len(.s) && .s[.i:.i+2] == "|*" { // other version of universal selector .i += 2 } case '#', '.', '[', ':': // There's no type selector. Wait to process the other till the main loop. default: , := .parseTypeSelector() if != nil { return nil, } = append(, ) } var string : for .i < len(.s) { var ( Sel string error ) switch .s[.i] { case '#': , = .parseIDSelector() case '.': , = .parseClassSelector() case '[': , = .parseAttributeSelector() case ':': , , = .parsePseudoclassSelector() default: break } if != nil { return nil, } // From https://drafts.csswg.org/selectors-3/#pseudo-elements : // "Only one pseudo-element may appear per selector, and if present // it must appear after the sequence of simple selectors that // represents the subjects of the selector."" if == nil { // we found a pseudo-element if != "" { return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", , ) } if !.acceptPseudoElements { return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", ) } = } else { if != "" { return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", ) } = append(, ) } } if len() == 1 && == "" { // no need wrap the selectors in compoundSelector return [0], nil } return compoundSelector{selectors: , pseudoElement: }, nil } // parseSelector parses a selector that may include combinators. func ( *parser) () (Sel, error) { .skipWhitespace() , := .parseSimpleSelectorSequence() if != nil { return nil, } for { var ( byte Sel ) if .skipWhitespace() { = ' ' } if .i >= len(.s) { return , nil } switch .s[.i] { case '+', '>', '~': = .s[.i] .i++ .skipWhitespace() case ',', ')': // These characters can't begin a selector, but they can legally occur after one. return , nil } if == 0 { return , nil } , = .parseSimpleSelectorSequence() if != nil { return nil, } = combinedSelector{first: , combinator: , second: } } } // parseSelectorGroup parses a group of selectors, separated by commas. func ( *parser) () (SelectorGroup, error) { , := .parseSelector() if != nil { return nil, } := SelectorGroup{} for .i < len(.s) { if .s[.i] != ',' { break } .i++ , := .parseSelector() if != nil { return nil, } = append(, ) } return , nil }