commit 4e8927610aa7b55d37543ce391f9f09c50f9123d Author: Michael Fladischer Date: Tue Nov 13 19:13:59 2018 +0100 Import python-cssselect2_0.2.1.orig.tar.gz [dgit import orig python-cssselect2_0.2.1.orig.tar.gz] diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..e68cfbd --- /dev/null +++ b/.coveragerc @@ -0,0 +1,10 @@ +[run] +branch = True + +[report] +exclude_lines = + pragma: no cover + def __repr__ + raise NotImplementedError +omit = + .* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c5564c --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.pyc +*.egg-info +/.cache +/.coverage +/.eggs +/build +/dist +/env +/htmlcov +/profile diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cf10dd4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,30 @@ +language: python +sudo: false + +git: + submodules: false + +matrix: + include: + - os: linux + python: 2.7 + - os: linux + python: 3.3 + - os: linux + python: 3.4 + - os: linux + python: 3.5 + - os: linux + python: 3.6 + - os: osx + language: generic + env: PYTHON_VERSION=3 + +before_install: + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install python3; fi + +install: + - pip$PYTHON_VERSION install --upgrade -e.[test] + +script: + - python$PYTHON_VERSION setup.py test diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..95eacc2 --- /dev/null +++ b/CHANGES @@ -0,0 +1,29 @@ +cssselect2 changelog +==================== + + +Version 0.2.1 +------------- + +Released on 2017-10-02. + +* Fix documentation. + + +Version 0.2.0 +------------- + +Released on 2017-08-16. + +* Fix some selectors for HTML documents with no namespace. +* Don't crash when the attribute comparator is unknown. +* Don't crash when there are empty attribute classes. +* Follow semantic versioning. + + +Version 0.1 +----------- + +Released on 2017-07-07. + +* Initial release. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0a38c50 --- /dev/null +++ b/LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2012 - 2013 by Simon Sapin, 2017 by Guillaume Ayoub. + +Some rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * The names of the contributors may not be used to endorse or + promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..2675103 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.rst CHANGES LICENSE tox.ini .coveragerc +recursive-include docs * +prune docs/_build diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..177e95a --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,39 @@ +Metadata-Version: 1.0 +Name: cssselect2 +Version: 0.2.1 +Summary: CSS selectors for Python ElementTree +Home-page: http://packages.python.org/cssselect2/ +Author: Simon Sapin +Author-email: simon.sapin@exyr.org +License: BSD +Description-Content-Type: UNKNOWN +Description: cssselect2: CSS selectors for Python ElementTree + ################################################ + + cssselect2 is a straightforward implementation of `CSS3 Selectors`_ for markup + documents (HTML, XML, etc.) that can be read by `ElementTree`_-like parsers + (including cElementTree, lxml_, html5lib_, etc.) + + Unlike cssselect_, it does not translate selectors to XPath_ and therefore does + not have all the correctness corner cases that are hard or impossible to fix in + cssselect. + + .. _ElementTree: http://docs.python.org/3/library/xml.etree.elementtree.html + .. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/ + .. _lxml: http://lxml.de/ + .. _html5lib: https://github.com/html5lib/html5lib-python + .. _cssselect: http://packages.python.org/cssselect/ + .. _XPath: http://www.w3.org/TR/xpath/ + + + Quick facts: + + * Free software: BSD licensed + * Compatible with Python 2.7+ and 3.3+ + * Latest documentation: http://cssselect2.readthedocs.io/ + * Source, issues and pull requests `on Github + `_ + * Releases `on PyPI `_ + * Install with ``pip install cssselect2`` + +Platform: UNKNOWN diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..0e34b10 --- /dev/null +++ b/README.rst @@ -0,0 +1,28 @@ +cssselect2: CSS selectors for Python ElementTree +################################################ + +cssselect2 is a straightforward implementation of `CSS3 Selectors`_ for markup +documents (HTML, XML, etc.) that can be read by `ElementTree`_-like parsers +(including cElementTree, lxml_, html5lib_, etc.) + +Unlike cssselect_, it does not translate selectors to XPath_ and therefore does +not have all the correctness corner cases that are hard or impossible to fix in +cssselect. + +.. _ElementTree: http://docs.python.org/3/library/xml.etree.elementtree.html +.. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/ +.. _lxml: http://lxml.de/ +.. _html5lib: https://github.com/html5lib/html5lib-python +.. _cssselect: http://packages.python.org/cssselect/ +.. _XPath: http://www.w3.org/TR/xpath/ + + +Quick facts: + +* Free software: BSD licensed +* Compatible with Python 2.7+ and 3.3+ +* Latest documentation: http://cssselect2.readthedocs.io/ +* Source, issues and pull requests `on Github + `_ +* Releases `on PyPI `_ +* Install with ``pip install cssselect2`` diff --git a/cssselect2.egg-info/PKG-INFO b/cssselect2.egg-info/PKG-INFO new file mode 100644 index 0000000..177e95a --- /dev/null +++ b/cssselect2.egg-info/PKG-INFO @@ -0,0 +1,39 @@ +Metadata-Version: 1.0 +Name: cssselect2 +Version: 0.2.1 +Summary: CSS selectors for Python ElementTree +Home-page: http://packages.python.org/cssselect2/ +Author: Simon Sapin +Author-email: simon.sapin@exyr.org +License: BSD +Description-Content-Type: UNKNOWN +Description: cssselect2: CSS selectors for Python ElementTree + ################################################ + + cssselect2 is a straightforward implementation of `CSS3 Selectors`_ for markup + documents (HTML, XML, etc.) that can be read by `ElementTree`_-like parsers + (including cElementTree, lxml_, html5lib_, etc.) + + Unlike cssselect_, it does not translate selectors to XPath_ and therefore does + not have all the correctness corner cases that are hard or impossible to fix in + cssselect. + + .. _ElementTree: http://docs.python.org/3/library/xml.etree.elementtree.html + .. _CSS3 Selectors: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/ + .. _lxml: http://lxml.de/ + .. _html5lib: https://github.com/html5lib/html5lib-python + .. _cssselect: http://packages.python.org/cssselect/ + .. _XPath: http://www.w3.org/TR/xpath/ + + + Quick facts: + + * Free software: BSD licensed + * Compatible with Python 2.7+ and 3.3+ + * Latest documentation: http://cssselect2.readthedocs.io/ + * Source, issues and pull requests `on Github + `_ + * Releases `on PyPI `_ + * Install with ``pip install cssselect2`` + +Platform: UNKNOWN diff --git a/cssselect2.egg-info/SOURCES.txt b/cssselect2.egg-info/SOURCES.txt new file mode 100644 index 0000000..7628fa5 --- /dev/null +++ b/cssselect2.egg-info/SOURCES.txt @@ -0,0 +1,31 @@ +.coveragerc +.gitignore +.travis.yml +CHANGES +LICENSE +MANIFEST.in +README.rst +example.py +setup.cfg +setup.py +cssselect2/__init__.py +cssselect2/_compat.py +cssselect2/compiler.py +cssselect2/parser.py +cssselect2/tree.py +cssselect2.egg-info/PKG-INFO +cssselect2.egg-info/SOURCES.txt +cssselect2.egg-info/dependency_links.txt +cssselect2.egg-info/requires.txt +cssselect2.egg-info/top_level.txt +cssselect2/tests/LICENSE +cssselect2/tests/__init__.py +cssselect2/tests/content.xhtml +cssselect2/tests/ids.html +cssselect2/tests/invalid_selectors.json +cssselect2/tests/make_valid_selectors_json.sh +cssselect2/tests/shakespeare.html +cssselect2/tests/test_cssselect2.py +cssselect2/tests/valid_selectors.json +docs/conf.py +docs/index.rst \ No newline at end of file diff --git a/cssselect2.egg-info/dependency_links.txt b/cssselect2.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/cssselect2.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/cssselect2.egg-info/requires.txt b/cssselect2.egg-info/requires.txt new file mode 100644 index 0000000..0ac0ea0 --- /dev/null +++ b/cssselect2.egg-info/requires.txt @@ -0,0 +1,7 @@ +tinycss2 + +[test] +pytest-runner +pytest-cov +pytest-flake8 +pytest-isort diff --git a/cssselect2.egg-info/top_level.txt b/cssselect2.egg-info/top_level.txt new file mode 100644 index 0000000..452f8dc --- /dev/null +++ b/cssselect2.egg-info/top_level.txt @@ -0,0 +1 @@ +cssselect2 diff --git a/cssselect2/__init__.py b/cssselect2/__init__.py new file mode 100644 index 0000000..a4c50ec --- /dev/null +++ b/cssselect2/__init__.py @@ -0,0 +1,114 @@ +# coding: utf8 +""" + cssselect2 + ---------- + + CSS selectors for ElementTree. + + :copyright: (c) 2012 by Simon Sapin, 2017 by Guillaume Ayoub. + :license: BSD, see LICENSE for more details. + +""" + +from __future__ import unicode_literals + +import operator + +from webencodings import ascii_lower + +# Classes are imported here to expose them at the top level of the module +from .compiler import compile_selector_list # noqa +from .parser import SelectorError # noqa +from .tree import ElementWrapper # noqa + + +VERSION = '0.2.1' + + +class Matcher(object): + """A CSS selectors storage that can match against HTML elements.""" + def __init__(self): + self.id_selectors = {} + self.class_selectors = {} + self.lower_local_name_selectors = {} + self.namespace_selectors = {} + self.other_selectors = [] + self.order = 0 + + def add_selector(self, selector, payload): + """ + Add a selector and its payload to the matcher. + + :param selector: + A :class:`CompiledSelector` object. + :param payload: + Some data associated to the selector, + such as :class:`declarations <~tinycss2.ast.Declaration>` + parsed from the :attr:`~tinycss2.ast.QualifiedRule.content` + of a style rule. + It can be any Python object, + and will be returned as-is by :meth:`match`. + + """ + self.order += 1 + + if selector.never_matches: + return + + entry = ( + selector.test, selector.specificity, self.order, + selector.pseudo_element, payload) + if selector.id is not None: + self.id_selectors.setdefault(selector.id, []).append(entry) + elif selector.class_name is not None: + self.class_selectors.setdefault(selector.class_name, []) \ + .append(entry) + elif selector.local_name is not None: + self.lower_local_name_selectors.setdefault( + selector.lower_local_name, []).append(entry) + elif selector.namespace is not None: + self.namespace_selectors.setdefault(selector.namespace, []) \ + .append(entry) + else: + self.other_selectors.append(entry) + + def match(self, element): + """ + Match selectors against the given element. + + :param element: + An :class:`ElementWrapper`. + :returns: + A list of the :obj:`payload` objects associated + to selectors that match element, + in order of lowest to highest :attr:`~CompiledSelector.specificity` + and in order of addition with :meth:`add_selector` + among selectors of equal specificity. + + """ + relevant_selectors = [] + + if element.id is not None: + relevant_selectors.append(self.id_selectors.get(element.id, [])) + + for class_name in element.classes: + relevant_selectors.append(self.class_selectors.get(class_name, [])) + + relevant_selectors.append( + self.lower_local_name_selectors.get( + ascii_lower(element.local_name), [])) + relevant_selectors.append( + self.namespace_selectors.get(element.namespace_url, [])) + relevant_selectors.append(self.other_selectors) + + results = [ + (specificity, order, pseudo, payload) + for selector_list in relevant_selectors + for test, specificity, order, pseudo, payload in selector_list + if test(element) + ] + results.sort(key=SORT_KEY) + return results + + +SORT_KEY = operator.itemgetter(0, 1) diff --git a/cssselect2/_compat.py b/cssselect2/_compat.py new file mode 100644 index 0000000..f2939da --- /dev/null +++ b/cssselect2/_compat.py @@ -0,0 +1,9 @@ +try: + basestring = basestring +except NameError: + basestring = str + +try: + from itertools import ifilter +except ImportError: + ifilter = filter diff --git a/cssselect2/compiler.py b/cssselect2/compiler.py new file mode 100644 index 0000000..72bb5da --- /dev/null +++ b/cssselect2/compiler.py @@ -0,0 +1,336 @@ +# coding: utf8 + +from __future__ import unicode_literals + +import re + +from tinycss2.nth import parse_nth +from webencodings import ascii_lower + +from . import parser +from .parser import SelectorError + +# http://dev.w3.org/csswg/selectors/#whitespace +split_whitespace = re.compile('[^ \t\r\n\f]+').findall + + +def compile_selector_list(input, namespaces=None): + """Compile a (comma-separated) list of selectors. + + :param input: + A :term:`tinycss2:string`, + or an iterable of tinycss2 :term:`tinycss2:component values` such as + the :attr:`~tinycss2.ast.QualifiedRule.predule` of a style rule. + :param namespaces: + A optional dictionary of all `namespace prefix declarations + `_ in scope for this selector. + Keys are namespace prefixes as strings, or ``None`` for the default + namespace. + Values are namespace URLs as strings. + If omitted, assume that no prefix is declared. + :returns: + A list of opaque :class:`CompiledSelector` objects. + + """ + return [ + CompiledSelector(selector) + for selector in parser.parse(input, namespaces) + ] + + +class CompiledSelector(object): + def __init__(self, parsed_selector): + source = _compile_node(parsed_selector.parsed_tree) + self.never_matches = source == '0' + self.test = eval( + 'lambda el: ' + source, + {'split_whitespace': split_whitespace, 'ascii_lower': ascii_lower}, + {}, + ) + self.specificity = parsed_selector.specificity + self.pseudo_element = parsed_selector.pseudo_element + self.id = None + self.class_name = None + self.local_name = None + self.lower_local_name = None + self.namespace = None + + node = parsed_selector.parsed_tree + if isinstance(node, parser.CombinedSelector): + node = node.right + for simple_selector in node.simple_selectors: + if isinstance(simple_selector, parser.IDSelector): + self.id = simple_selector.ident + elif isinstance(simple_selector, parser.ClassSelector): + self.class_name = simple_selector.class_name + elif isinstance(simple_selector, parser.LocalNameSelector): + self.local_name = simple_selector.local_name + self.lower_local_name = simple_selector.lower_local_name + elif isinstance(simple_selector, parser.NamespaceSelector): + self.namespace = simple_selector.namespace + + +def _compile_node(selector): + """Return a boolean expression, as a Python source string. + + When evaluated in a context where the `el` variable is an + :class:`~cssselect2.tree.Element` object, + tells whether the element is a subject of `selector`. + + """ + # To avoid precedence-related bugs, any sub-expression that is passed + # around must be "atomic": add parentheses when the top-level would be + # an operator. Bare literals and function calls are fine. + + # 1 and 0 are used for True and False to avoid global lookups. + + if isinstance(selector, parser.CombinedSelector): + left_inside = _compile_node(selector.left) + if left_inside == '0': + return '0' # 0 and x == 0 + elif left_inside == '1': + # 1 and x == x, but the element matching 1 still needs to exist. + if selector.combinator in (' ', '>'): + left = 'el.parent is not None' + elif selector.combinator in ('~', '+'): + left = 'el.previous is not None' + else: + raise SelectorError('Unknown combinator', selector.combinator) + # Rebind the `el` name inside a generator-expressions (in a new scope) + # so that 'left_inside' applies to different elements. + elif selector.combinator == ' ': + left = 'any((%s) for el in el.iter_ancestors())' % left_inside + elif selector.combinator == '>': + left = ('next(el is not None and (%s) for el in [el.parent])' + % left_inside) + elif selector.combinator == '+': + left = ('next(el is not None and (%s) for el in [el.previous])' + % left_inside) + elif selector.combinator == '~': + left = ('any((%s) for el in el.iter_previous_siblings())' + % left_inside) + else: + raise SelectorError('Unknown combinator', selector.combinator) + + right = _compile_node(selector.right) + if right == '0': + return '0' # 0 and x == 0 + elif right == '1': + return left # 1 and x == x + else: + # Evaluate combinators right to left: + return '(%s) and (%s)' % (right, left) + + elif isinstance(selector, parser.CompoundSelector): + sub_expressions = [ + expr for expr in map(_compile_node, selector.simple_selectors) + if expr != '1'] + if len(sub_expressions) == 1: + test = sub_expressions[0] + elif '0' in sub_expressions: + test = '0' + elif sub_expressions: + test = ' and '.join('(%s)' % e for e in sub_expressions) + else: + test = '1' # all([]) == True + + if isinstance(selector, parser.NegationSelector): + if test == '0': + return '1' + elif test == '1': + return '0' + else: + return 'not (%s)' % test + else: + return test + + elif isinstance(selector, parser.LocalNameSelector): + return ('el.local_name == (%r if el.in_html_document else %r)' + % (selector.lower_local_name, selector.local_name)) + + elif isinstance(selector, parser.NamespaceSelector): + return 'el.namespace_url == %r' % selector.namespace + + elif isinstance(selector, parser.ClassSelector): + return '%r in el.classes' % selector.class_name + + elif isinstance(selector, parser.IDSelector): + return 'el.id == %r' % selector.ident + + elif isinstance(selector, parser.AttributeSelector): + if selector.namespace is not None: + if selector.namespace: + key = '(%r if el.in_html_document else %r)' % ( + '{%s}%s' % (selector.namespace, selector.lower_name), + '{%s}%s' % (selector.namespace, selector.name), + ) + else: + key = ('(%r if el.in_html_document else %r)' + % (selector.lower_name, selector.name)) + value = selector.value + if selector.operator is None: + return 'el.etree_element.get(%s) is not None' % key + elif selector.operator == '=': + return 'el.etree_element.get(%s) == %r' % (key, value) + elif selector.operator == '~=': + if len(value.split()) != 1 or value.strip() != value: + return '0' + else: + return ( + '%r in split_whitespace(el.etree_element.get(%s, ""))' + % (value, key)) + elif selector.operator == '|=': + return ('next(v == %r or (v is not None and v.startswith(%r))' + ' for v in [el.etree_element.get(%s)])' + % (value, value + '-', key)) + elif selector.operator == '^=': + if value: + return 'el.etree_element.get(%s, "").startswith(%r)' % ( + key, value) + else: + return '0' + elif selector.operator == '$=': + if value: + return 'el.etree_element.get(%s, "").endswith(%r)' % ( + key, value) + else: + return '0' + elif selector.operator == '*=': + if value: + return '%r in el.etree_element.get(%s, "")' % (value, key) + else: + return '0' + else: + raise SelectorError( + 'Unknown attribute operator', selector.operator) + else: # In any namespace + raise NotImplementedError # TODO + + elif isinstance(selector, parser.PseudoClassSelector): + if selector.name == 'link': + return ('%s and el.etree_element.get("href") is not None' + % html_tag_eq('a', 'area', 'link')) + elif selector.name == 'enabled': + return ( + '(%s and el.etree_element.get("disabled") is None' + ' and not el.in_disabled_fieldset) or' + '(%s and el.etree_element.get("disabled") is None) or ' + '(%s and el.etree_element.get("href") is not None)' + % ( + html_tag_eq('button', 'input', 'select', 'textarea', + 'option'), + html_tag_eq('optgroup', 'menuitem', 'fieldset'), + html_tag_eq('a', 'area', 'link'), + ) + ) + elif selector.name == 'disabled': + return ( + '(%s and (el.etree_element.get("disabled") is not None' + ' or el.in_disabled_fieldset)) or' + '(%s and el.etree_element.get("disabled") is not None)' % ( + html_tag_eq('button', 'input', 'select', 'textarea', + 'option'), + html_tag_eq('optgroup', 'menuitem', 'fieldset'), + ) + ) + elif selector.name == 'checked': + return ( + '(%s and el.etree_element.get("checked") is not None and' + ' ascii_lower(el.etree_element.get("type", "")) ' + ' in ("checkbox", "radio"))' + 'or (%s and el.etree_element.get("selected") is not None)' + % ( + html_tag_eq('input', 'menuitem'), + html_tag_eq('option'), + ) + ) + elif selector.name in ('visited', 'hover', 'active', 'focus', + 'target'): + # Not applicable in a static context: never match. + return '0' + elif selector.name == 'root': + return 'el.parent is None' + elif selector.name == 'first-child': + return 'el.index == 0' + elif selector.name == 'last-child': + return 'el.index + 1 == len(el.etree_siblings)' + elif selector.name == 'first-of-type': + return ('all(s.tag != el.etree_element.tag' + ' for s in el.etree_siblings[:el.index])') + elif selector.name == 'last-of-type': + return ('all(s.tag != el.etree_element.tag' + ' for s in el.etree_siblings[el.index + 1:])') + elif selector.name == 'only-child': + return 'len(el.etree_siblings) == 1' + elif selector.name == 'only-of-type': + return ('all(s.tag != el.etree_element.tag or i == el.index' + ' for i, s in enumerate(el.etree_siblings))') + elif selector.name == 'empty': + return 'not (el.etree_children or el.etree_element.text)' + else: + raise SelectorError('Unknown pseudo-class', selector.name) + + elif isinstance(selector, parser.FunctionalPseudoClassSelector): + if selector.name == 'lang': + tokens = [ + t for t in selector.arguments + if t.type != 'whitespace' + ] + if len(tokens) == 1 and tokens[0].type == 'ident': + lang = tokens[0].lower_value + else: + raise SelectorError('Invalid arguments for :lang()') + + return ('el.lang == %r or el.lang.startswith(%r)' + % (lang, lang + '-')) + else: + if selector.name == 'nth-child': + count = 'el.index' + elif selector.name == 'nth-last-child': + count = '(len(el.etree_siblings) - el.index - 1)' + elif selector.name == 'nth-of-type': + count = ('sum(1 for s in el.etree_siblings[:el.index]' + ' if s.tag == el.etree_element.tag)') + elif selector.name == 'nth-last-of-type': + count = ('sum(1 for s in el.etree_siblings[el.index + 1:]' + ' if s.tag == el.etree_element.tag)') + else: + raise SelectorError('Unknown pseudo-class', selector.name) + + result = parse_nth(selector.arguments) + if result is None: + raise SelectorError( + 'Invalid arguments for :%s()' % selector.name) + a, b = result + # x is the number of siblings before/after the element + # Matches if a positive or zero integer n exists so that: + # x = a*n + b-1 + # x = a*n + B + B = b - 1 + if a == 0: + # x = B + return '%s == %i' % (count, B) + else: + # n = (x - B) / a + return ('next(r == 0 and n >= 0' + ' for n, r in [divmod(%s - %i, %i)])' + % (count, B, a)) + + else: + raise TypeError(type(selector), selector) + + +def html_tag_eq(*local_names): + if len(local_names) == 1: + return ( + '((el.local_name == %r) if el.in_html_document else ' + '(el.etree_element.tag == %r))' % ( + local_names[0], + '{http://www.w3.org/1999/xhtml}' + local_names[0])) + else: + return ( + '((el.local_name in (%s)) if el.in_html_document else ' + '(el.etree_element.tag in (%s)))' % ( + ', '.join(repr(n) for n in local_names), + ', '.join(repr('{http://www.w3.org/1999/xhtml}' + n) + for n in local_names))) diff --git a/cssselect2/parser.py b/cssselect2/parser.py new file mode 100644 index 0000000..8d63e9a --- /dev/null +++ b/cssselect2/parser.py @@ -0,0 +1,427 @@ +# coding: utf8 +""" + cssselect2.parser + ----------------- + + A parser for CSS selectors, based on the tinycss tokenizer. + + :copyright: (c) 2012 by Simon Sapin, 2017 by Guillaume Ayoub. + :license: BSD, see LICENSE for more details. + +""" + +from __future__ import unicode_literals + +from tinycss2 import parse_component_value_list + +from ._compat import basestring + +__all__ = ['parse'] + + +def parse(input, namespaces=None): + """ + :param input: + A :term:`string`, or an iterable of :term:`component values`. + """ + if isinstance(input, basestring): + input = parse_component_value_list(input) + tokens = TokenStream(input) + namespaces = namespaces or {} + yield parse_selector(tokens, namespaces) + tokens.skip_whitespace_and_comment() + while 1: + next = tokens.next() + if next is None: + return + elif next == ',': + yield parse_selector(tokens, namespaces) + else: + raise SelectorError(next, 'unpexpected %s token.' % next.type) + + +def parse_selector(tokens, namespaces): + result, pseudo_element = parse_compound_selector(tokens, namespaces) + while 1: + has_whitespace = tokens.skip_whitespace() + while tokens.skip_comment(): + has_whitespace = tokens.skip_whitespace() or has_whitespace + if pseudo_element is not None: + return Selector(result, pseudo_element) + peek = tokens.peek() + if peek is None or peek == ',': + return Selector(result, pseudo_element) + elif peek in ('>', '+', '~'): + combinator = peek.value + tokens.next() + elif has_whitespace: + combinator = ' ' + else: + return Selector(result, pseudo_element) + compound, pseudo_element = parse_compound_selector(tokens, namespaces) + result = CombinedSelector(result, combinator, compound) + + +def parse_compound_selector(tokens, namespaces): + type_selectors = parse_type_selector(tokens, namespaces) + simple_selectors = type_selectors if type_selectors is not None else [] + while 1: + simple_selector, pseudo_element = parse_simple_selector( + tokens, namespaces) + if pseudo_element is not None or simple_selector is None: + break + simple_selectors.append(simple_selector) + + if (simple_selectors or type_selectors is not None or + pseudo_element is not None): + return CompoundSelector(simple_selectors), pseudo_element + else: + peek = tokens.peek() + raise SelectorError(peek, 'expected a compound selector, got %s' + % (peek.type if peek else 'EOF')) + + +def parse_type_selector(tokens, namespaces): + tokens.skip_whitespace() + qualified_name = parse_qualified_name(tokens, namespaces) + if qualified_name is None: + return None + + simple_selectors = [] + namespace, local_name = qualified_name + if local_name is not None: + simple_selectors.append(LocalNameSelector(local_name)) + if namespace is not None: + simple_selectors.append(NamespaceSelector(namespace)) + return simple_selectors + + +def parse_simple_selector(tokens, namespaces, in_negation=False): + peek = tokens.peek() + if peek is None: + return None, None + if peek.type == 'hash' and peek.is_identifier: + tokens.next() + return IDSelector(peek.value), None + elif peek == '.': + tokens.next() + next = tokens.next() + if next is None or next.type != 'ident': + raise SelectorError( + next, 'Expected a class name, got %s' % next) + return ClassSelector(next.value), None + elif peek.type == '[] block': + tokens.next() + attr = parse_attribute_selector(TokenStream(peek.content), namespaces) + return attr, None + elif peek == ':': + tokens.next() + next = tokens.next() + if next == ':': + next = tokens.next() + if next is None or next.type != 'ident': + raise SelectorError( + next, 'Expected a pseudo-element name, got %s' % next) + return None, next.lower_value + elif next is not None and next.type == 'ident': + name = next.lower_value + if name in ('before', 'after', 'first-line', 'first-letter'): + return None, name + else: + return PseudoClassSelector(name), None + elif next is not None and next.type == 'function': + name = next.lower_name + if name == 'not': + if in_negation: + raise SelectorError(next, 'nested :not()') + return parse_negation(next, namespaces), None + else: + return ( + FunctionalPseudoClassSelector(name, next.arguments), None) + else: + raise SelectorError(next, 'unexpected %s token.' % next) + else: + return None, None + + +def parse_negation(negation_token, namespaces): + tokens = TokenStream(negation_token.arguments) + type_selectors = parse_type_selector(tokens, namespaces) + if type_selectors is not None: + return NegationSelector(type_selectors) + + simple_selector, pseudo_element = parse_simple_selector( + tokens, namespaces, in_negation=True) + tokens.skip_whitespace() + if pseudo_element is None and tokens.next() is None: + return NegationSelector([simple_selector]) + else: + raise SelectorError( + negation_token, ':not() only accepts a simple selector') + + +def parse_attribute_selector(tokens, namespaces): + tokens.skip_whitespace() + qualified_name = parse_qualified_name( + tokens, namespaces, is_attribute=True) + if qualified_name is None: + next = tokens.next() + raise SelectorError( + next, 'expected attribute name, got %s' % next) + namespace, local_name = qualified_name + + tokens.skip_whitespace() + peek = tokens.peek() + if peek is None: + operator = None + value = None + elif peek in ('=', '~=', '|=', '^=', '$=', '*='): + operator = peek.value + tokens.next() + tokens.skip_whitespace() + next = tokens.next() + if next is None or next.type not in ('ident', 'string'): + next_type = 'None' if next is None else next.type + raise SelectorError( + next, 'expected attribute value, got %s' % next_type) + value = next.value + else: + raise SelectorError( + peek, 'expected attribute selector operator, got %s' % peek) + + tokens.skip_whitespace() + next = tokens.next() + if next is not None: + raise SelectorError(next, 'expected ], got %s' % next.type) + return AttributeSelector(namespace, local_name, operator, value) + + +def parse_qualified_name(tokens, namespaces, is_attribute=False): + """Returns None (not a qualified name) or (ns, local), + in which None is a wildcard. The empty string for ns is "no namespace". + + """ + peek = tokens.peek() + if peek is None: + return None + if peek.type == 'ident': + first_ident = tokens.next() + peek = tokens.peek() + if peek != '|': + namespace = '' if is_attribute else namespaces.get(None, None) + return namespace, (first_ident.value, first_ident.lower_value) + tokens.next() + namespace = namespaces.get(first_ident.value) + if namespace is None: + raise SelectorError( + first_ident, + 'undefined namespace prefix: ' + first_ident.value) + elif peek == '*': + next = tokens.next() + peek = tokens.peek() + if peek != '|': + if is_attribute: + raise SelectorError( + next, 'Expected local name, got %s' % next.type) + return namespaces.get(None, None), None + tokens.next() + namespace = None + elif peek == '|': + tokens.next() + namespace = '' + else: + return None + + # If we get here, we just consumed '|' and set ``namespace`` + next = tokens.next() + if next.type == 'ident': + return namespace, (next.value, next.lower_value) + elif next == '*' and not is_attribute: + return namespace, None + else: + raise SelectorError(next, 'Expected local name, got %s' % next.type) + + +class SelectorError(ValueError): + """A specialized ``ValueError`` for invalid selectors.""" + + +class TokenStream(object): + def __init__(self, tokens): + self.tokens = iter(tokens) + self.peeked = [] # In reversed order + + def next(self): + if self.peeked: + return self.peeked.pop() + else: + return next(self.tokens, None) + + def peek(self): + if not self.peeked: + self.peeked.append(next(self.tokens, None)) + return self.peeked[-1] + + def skip(self, skip_types): + found = False + while 1: + peek = self.peek() + if peek is None or peek.type not in skip_types: + break + self.next() + found = True + return found + + def skip_whitespace(self): + return self.skip(['whitespace']) + + def skip_comment(self): + return self.skip(['comment']) + + def skip_whitespace_and_comment(self): + return self.skip(['comment', 'whitespace']) + + +class Selector(object): + def __init__(self, tree, pseudo_element=None): + self.parsed_tree = tree + if pseudo_element is None: + self.pseudo_element = pseudo_element + #: Tuple of 3 integers: http://www.w3.org/TR/selectors/#specificity + self.specificity = tree.specificity + else: + self.pseudo_element = pseudo_element + a, b, c = tree.specificity + self.specificity = a, b, c + 1 + + def __repr__(self): + if self.pseudo_element is None: + return repr(self.parsed_tree) + else: + return '%r::%s' % (self.parsed_tree, self.pseudo_element) + + +class CombinedSelector(object): + def __init__(self, left, combinator, right): + #: Combined or compound selector + self.left = left + # One of `` `` (a single space), ``>``, ``+`` or ``~``. + self.combinator = combinator + #: compound selector + self.right = right + + @property + def specificity(self): + a1, b1, c1 = self.left.specificity + a2, b2, c2 = self.right.specificity + return a1 + a2, b1 + b2, c1 + c2 + + def __repr__(self): + return '%r%s%r' % (self.left, self.combinator, self.right) + + +class CompoundSelector(object): + """Aka. sequence of simple selectors, in Level 3.""" + def __init__(self, simple_selectors): + self.simple_selectors = simple_selectors + + @property + def specificity(self): + if self.simple_selectors: + # zip(*foo) turns [(a1, b1, c1), (a2, b2, c2), ...] + # into [(a1, a2, ...), (b1, b2, ...), (c1, c2, ...)] + return tuple(map(sum, zip( + *(sel.specificity for sel in self.simple_selectors)))) + else: + return 0, 0, 0 + + def __repr__(self): + return ''.join(map(repr, self.simple_selectors)) + + +class LocalNameSelector(object): + specificity = 0, 0, 1 + + def __init__(self, local_name): + self.local_name, self.lower_local_name = local_name + + def __repr__(self): + return self.local_name + + +class NamespaceSelector(object): + specificity = 0, 0, 0 + + def __init__(self, namespace): + #: The namespace URL as a string, + #: or the empty string for elements not in any namespace. + self.namespace = namespace + + def __repr__(self): + if self.namespace == '': + return '|' + else: + return '{%s}|' % self.namespace + + +class IDSelector(object): + specificity = 1, 0, 0 + + def __init__(self, ident): + self.ident = ident + + def __repr__(self): + return '#' + self.ident + + +class ClassSelector(object): + specificity = 0, 1, 0 + + def __init__(self, class_name): + self.class_name = class_name + + def __repr__(self): + return '.' + self.class_name + + +class AttributeSelector(object): + specificity = 0, 1, 0 + + def __init__(self, namespace, name, operator, value): + self.namespace = namespace + self.name, self.lower_name = name + #: A string like ``=`` or ``~=``, or None for ``[attr]`` selectors + self.operator = operator + #: A string, or None for ``[attr]`` selectors + self.value = value + + def __repr__(self): + namespace = ('*|' if self.namespace is None + else '{%s}' % self.namespace) + return '[%s%s%s%r]' % (namespace, self.name, self.operator, self.value) + + +class PseudoClassSelector(object): + specificity = 0, 1, 0 + + def __init__(self, name): + self.name = name + + def __repr__(self): + return ':' + self.name + + +class FunctionalPseudoClassSelector(object): + specificity = 0, 1, 0 + + def __init__(self, name, arguments): + self.name = name + self.arguments = arguments + + def __repr__(self): + return ':%s%r' % (self.name, tuple(self.arguments)) + + +class NegationSelector(CompoundSelector): + def __repr__(self): + return ':not(%r)' % CompoundSelector.__repr__(self) diff --git a/cssselect2/tests/LICENSE b/cssselect2/tests/LICENSE new file mode 100644 index 0000000..78a9256 --- /dev/null +++ b/cssselect2/tests/LICENSE @@ -0,0 +1,4 @@ +These files are taken form the web-platform-test repository +and used under a 3-clause BSD License. + +https://github.com/w3c/web-platform-tests/tree/master/selectors-api diff --git a/cssselect2/tests/__init__.py b/cssselect2/tests/__init__.py new file mode 100644 index 0000000..655718d --- /dev/null +++ b/cssselect2/tests/__init__.py @@ -0,0 +1,11 @@ +# coding: utf8 +""" + cssselect2.tests + ---------------- + + Test suite for cssselect2. + + :copyright: (c) 2012 by Simon Sapin, 2017 by Guillaume Ayoub. + :license: BSD, see LICENSE for more details. + +""" diff --git a/cssselect2/tests/content.xhtml b/cssselect2/tests/content.xhtml new file mode 100644 index 0000000..a7a2ee3 --- /dev/null +++ b/cssselect2/tests/content.xhtml @@ -0,0 +1,372 @@ + + + + Selectors-API Test Suite: HTML with Selectors Level 2 using TestHarness: Test Document + + + + + + + +
+
+ +
+

Universal selector tests inside element with id="universal".

+
+
Some preformatted text with some embedded code
+

This is a normal link: W3C

+
Some more nested elements code hyperlink
+
+ +
+
+
+
+
+

+

+		
+
    + + + + +
    + +
    +
    +
    +
    +
    + +
    + + + + + + + + + +
    + +
    +
    + +
    +
    +
    +
    + +
    +
    + + + + + + + + + +

    +
    + +
    +
    +
    +
    +
    +
    + +
    + + + + +
    +
    +
    +
    +
    + +

    +
    + +
    + + + + +
    +
    +
    +
    + +

    +
    + +
    + + + + +
    +
    +
    +
    +
    +
    + +

    +
    + +
    + + + + +
    + +
      +
    1. +
    2. +
    3. +
    4. +
    5. +
    6. +
    7. +
    8. +
    9. +
    10. +
    11. +
    12. +
    + +

    + span1 + em1 + + em2 + span2 + strong1 + em3 + span3 + span4 + strong2 + em4 +

    +
    + +
    +
    +
    +
    + +

    +

    +

    +
    + +
    +

    +

    +

    + +
    +
    +
    +
    + +
    +

    + +

    +

    + + +

    +

    + + + +

    +
    > + +
    +

    +

    +

    +

    Text node

    +

    +
    + + + +
    +
    +
    +
    +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    +
    +
    +
    + +

    +

    +

    +
    + +
    All pseudo-element tests
    + +
    +

    +

    +

    + + +
    +
    +

    +
    +

    +
    +
    +
    +
    + + + + + + +
    + +
    +
    +
    + +
      +
    • +
    • +
    • +
    • +
    + + + + + + +
    + +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    +
    +
    +
    +

    +

    +
    + +
    +
    +
    +
    +
    +
    +

    +
    +
    +
    +

    +

    +
    + +
    + + +
    +
    + + diff --git a/cssselect2/tests/ids.html b/cssselect2/tests/ids.html new file mode 100644 index 0000000..22c7e3b --- /dev/null +++ b/cssselect2/tests/ids.html @@ -0,0 +1,48 @@ + + + + +
    + + + + link +
      +
    1. content
    2. +
    3. +
      +
      +
    4. +
    5. +
    6. +
    7. +
    8. +
    9. +
    +

    + hi there + guy + + + + + + + +

    + + +
    +

    +
      +
    + + + + +
    +
    + diff --git a/cssselect2/tests/invalid_selectors.json b/cssselect2/tests/invalid_selectors.json new file mode 100644 index 0000000..a10ff0c --- /dev/null +++ b/cssselect2/tests/invalid_selectors.json @@ -0,0 +1,36 @@ +[ + {"name": "Empty String", "selector": ""}, + {"name": "Invalid character", "selector": "["}, + {"name": "Invalid character", "selector": "]"}, + {"name": "Invalid character", "selector": "("}, + {"name": "Invalid character", "selector": ")"}, + {"name": "Invalid character", "selector": "{"}, + {"name": "Invalid character", "selector": "}"}, + {"name": "Invalid character", "selector": "<"}, + {"name": "Invalid character", "selector": ">"}, + {"name": "Invalid character", "selector": ":"}, + {"name": "Invalid character", "selector": "::"}, + {"name": "Invalid ID", "selector": "#"}, + {"name": "Invalid group of selectors", "selector": "div,"}, + {"name": "Invalid class", "selector": "."}, + {"name": "Invalid class", "selector": ".5cm"}, + {"name": "Invalid class", "selector": "..test"}, + {"name": "Invalid class", "selector": ".foo..quux"}, + {"name": "Invalid class", "selector": ".bar."}, + {"name": "Invalid combinator", "selector": "div & address, p"}, + {"name": "Invalid combinator", "selector": "div >> address, p"}, + {"name": "Invalid combinator", "selector": "div ++ address, p"}, + {"name": "Invalid combinator", "selector": "div ~~ address, p"}, + {"name": "Invalid [att=value] selector", "selector": "[*=test]"}, + {"name": "Invalid [att=value] selector", "selector": "[*|*=test]"}, + {"name": "Invalid [att=value] selector", "selector": "[class= space unquoted ]"}, + {"name": "Unknown pseudo-class", "selector": "div:example"}, + {"name": "Unknown pseudo-class", "selector": ":example"}, + {"name": "Unknown pseudo-element", "selector": "div::example", "xfail": true}, + {"name": "Unknown pseudo-element", "selector": "::example", "xfail": true}, + {"name": "Invalid pseudo-element", "selector": ":::before"}, + {"name": "Undeclared namespace", "selector": "ns|div"}, + {"name": "Undeclared namespace", "selector": ":not(ns|div)"}, + {"name": "Invalid namespace", "selector": "^|div"}, + {"name": "Invalid namespace", "selector": "$|div"} +] diff --git a/cssselect2/tests/make_valid_selectors_json.sh b/cssselect2/tests/make_valid_selectors_json.sh new file mode 100755 index 0000000..161ab9d --- /dev/null +++ b/cssselect2/tests/make_valid_selectors_json.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +WEB_PLATFORM_TESTS="$1" + +if [ -f "$WEB_PLATFORM_TESTS/selectors-api/selectors.js" ] +then + ( + cat "$WEB_PLATFORM_TESTS/selectors-api/selectors.js" + echo "validSelectors.map(function(selector) {" + echo " delete selector.testType;" + echo "});" + echo "console.log(JSON.stringify(validSelectors, null, ' '))" + ) | node +else + echo "Usage: $0 path/to/web-plateform-test" + exit; +fi diff --git a/cssselect2/tests/shakespeare.html b/cssselect2/tests/shakespeare.html new file mode 100644 index 0000000..8a42029 --- /dev/null +++ b/cssselect2/tests/shakespeare.html @@ -0,0 +1,307 @@ + + + + + + +
    +
    +

    As You Like It

    +
    + by William Shakespeare +
    +
    +

    ACT I, SCENE III. A room in the palace.

    +
    +
    Enter CELIA and ROSALIND
    +
    +
    CELIA
    +
    +
    Why, cousin! why, Rosalind! Cupid have mercy! not a word?
    +
    +
    ROSALIND
    +
    +
    Not one to throw at a dog.
    +
    +
    CELIA
    +
    +
    No, thy words are too precious to be cast away upon
    +
    curs; throw some of them at me; come, lame me with reasons.
    +
    +
    ROSALIND
    +
    CELIA
    +
    +
    But is all this for your father?
    +
    +
    +
    Then there were two cousins laid up; when the one
    +
    should be lamed with reasons and the other mad
    +
    without any.
    +
    +
    ROSALIND
    +
    +
    No, some of it is for my child's father. O, how
    +
    full of briers is this working-day world!
    +
    +
    CELIA
    +
    +
    They are but burs, cousin, thrown upon thee in
    +
    holiday foolery: if we walk not in the trodden
    +
    paths our very petticoats will catch them.
    +
    +
    ROSALIND
    +
    +
    I could shake them off my coat: these burs are in my heart.
    +
    +
    CELIA
    +
    +
    Hem them away.
    +
    +
    ROSALIND
    +
    +
    I would try, if I could cry 'hem' and have him.
    +
    +
    CELIA
    +
    +
    Come, come, wrestle with thy affections.
    +
    +
    ROSALIND
    +
    +
    O, they take the part of a better wrestler than myself!
    +
    +
    CELIA
    +
    +
    O, a good wish upon you! you will try in time, in
    +
    despite of a fall. But, turning these jests out of
    +
    service, let us talk in good earnest: is it
    +
    possible, on such a sudden, you should fall into so
    +
    strong a liking with old Sir Rowland's youngest son?
    +
    +
    ROSALIND
    +
    +
    The duke my father loved his father dearly.
    +
    +
    CELIA
    +
    +
    Doth it therefore ensue that you should love his son
    +
    dearly? By this kind of chase, I should hate him,
    +
    for my father hated his father dearly; yet I hate
    +
    not Orlando.
    +
    +
    ROSALIND
    +
    +
    No, faith, hate him not, for my sake.
    +
    +
    CELIA
    +
    +
    Why should I not? doth he not deserve well?
    +
    +
    ROSALIND
    +
    +
    Let me love him for that, and do you love him
    +
    because I do. Look, here comes the duke.
    +
    +
    CELIA
    +
    +
    With his eyes full of anger.
    +
    Enter DUKE FREDERICK, with Lords
    +
    +
    DUKE FREDERICK
    +
    +
    Mistress, dispatch you with your safest haste
    +
    And get you from our court.
    +
    +
    ROSALIND
    +
    +
    Me, uncle?
    +
    +
    DUKE FREDERICK
    +
    +
    You, cousin
    +
    Within these ten days if that thou be'st found
    +
    So near our public court as twenty miles,
    +
    Thou diest for it.
    +
    +
    ROSALIND
    +
    +
    I do beseech your grace,
    +
    Let me the knowledge of my fault bear with me:
    +
    If with myself I hold intelligence
    +
    Or have acquaintance with mine own desires,
    +
    If that I do not dream or be not frantic,--
    +
    As I do trust I am not--then, dear uncle,
    +
    Never so much as in a thought unborn
    +
    Did I offend your highness.
    +
    +
    DUKE FREDERICK
    +
    +
    Thus do all traitors:
    +
    If their purgation did consist in words,
    +
    They are as innocent as grace itself:
    +
    Let it suffice thee that I trust thee not.
    +
    +
    ROSALIND
    +
    +
    Yet your mistrust cannot make me a traitor:
    +
    Tell me whereon the likelihood depends.
    +
    +
    DUKE FREDERICK
    +
    +
    Thou art thy father's daughter; there's enough.
    +
    +
    ROSALIND
    +
    +
    So was I when your highness took his dukedom;
    +
    So was I when your highness banish'd him:
    +
    Treason is not inherited, my lord;
    +
    Or, if we did derive it from our friends,
    +
    What's that to me? my father was no traitor:
    +
    Then, good my liege, mistake me not so much
    +
    To think my poverty is treacherous.
    +
    +
    CELIA
    +
    +
    Dear sovereign, hear me speak.
    +
    +
    DUKE FREDERICK
    +
    +
    Ay, Celia; we stay'd her for your sake,
    +
    Else had she with her father ranged along.
    +
    +
    CELIA
    +
    +
    I did not then entreat to have her stay;
    +
    It was your pleasure and your own remorse:
    +
    I was too young that time to value her;
    +
    But now I know her: if she be a traitor,
    +
    Why so am I; we still have slept together,
    +
    Rose at an instant, learn'd, play'd, eat together,
    +
    And wheresoever we went, like Juno's swans,
    +
    Still we went coupled and inseparable.
    +
    +
    DUKE FREDERICK
    +
    +
    She is too subtle for thee; and her smoothness,
    +
    Her very silence and her patience
    +
    Speak to the people, and they pity her.
    +
    Thou art a fool: she robs thee of thy name;
    +
    And thou wilt show more bright and seem more virtuous
    +
    When she is gone. Then open not thy lips:
    +
    Firm and irrevocable is my doom
    +
    Which I have pass'd upon her; she is banish'd.
    +
    +
    CELIA
    +
    +
    Pronounce that sentence then on me, my liege:
    +
    I cannot live out of her company.
    +
    +
    DUKE FREDERICK
    +
    +
    You are a fool. You, niece, provide yourself:
    +
    If you outstay the time, upon mine honour,
    +
    And in the greatness of my word, you die.
    +
    Exeunt DUKE FREDERICK and Lords
    +
    +
    CELIA
    +
    +
    O my poor Rosalind, whither wilt thou go?
    +
    Wilt thou change fathers? I will give thee mine.
    +
    I charge thee, be not thou more grieved than I am.
    +
    +
    ROSALIND
    +
    +
    I have more cause.
    +
    +
    CELIA
    +
    +
    Thou hast not, cousin;
    +
    Prithee be cheerful: know'st thou not, the duke
    +
    Hath banish'd me, his daughter?
    +
    +
    ROSALIND
    +
    +
    That he hath not.
    +
    +
    CELIA
    +
    +
    No, hath not? Rosalind lacks then the love
    +
    Which teacheth thee that thou and I am one:
    +
    Shall we be sunder'd? shall we part, sweet girl?
    +
    No: let my father seek another heir.
    +
    Therefore devise with me how we may fly,
    +
    Whither to go and what to bear with us;
    +
    And do not seek to take your change upon you,
    +
    To bear your griefs yourself and leave me out;
    +
    For, by this heaven, now at our sorrows pale,
    +
    Say what thou canst, I'll go along with thee.
    +
    +
    ROSALIND
    +
    +
    Why, whither shall we go?
    +
    +
    CELIA
    +
    +
    To seek my uncle in the forest of Arden.
    +
    +
    ROSALIND
    +
    +
    Alas, what danger will it be to us,
    +
    Maids as we are, to travel forth so far!
    +
    Beauty provoketh thieves sooner than gold.
    +
    +
    CELIA
    +
    +
    I'll put myself in poor and mean attire
    +
    And with a kind of umber smirch my face;
    +
    The like do you: so shall we pass along
    +
    And never stir assailants.
    +
    +
    ROSALIND
    +
    +
    Were it not better,
    +
    Because that I am more than common tall,
    +
    That I did suit me all points like a man?
    +
    A gallant curtle-axe upon my thigh,
    +
    A boar-spear in my hand; and--in my heart
    +
    Lie there what hidden woman's fear there will--
    +
    We'll have a swashing and a martial outside,
    +
    As many other mannish cowards have
    +
    That do outface it with their semblances.
    +
    +
    CELIA
    +
    +
    What shall I call thee when thou art a man?
    +
    +
    ROSALIND
    +
    +
    I'll have no worse a name than Jove's own page;
    +
    And therefore look you call me Ganymede.
    +
    But what will you be call'd?
    +
    +
    CELIA
    +
    +
    Something that hath a reference to my state
    +
    No longer Celia, but Aliena.
    +
    +
    ROSALIND
    +
    +
    But, cousin, what if we assay'd to steal
    +
    The clownish fool out of your father's court?
    +
    Would he not be a comfort to our travel?
    +
    +
    CELIA
    +
    +
    He'll go along o'er the wide world with me;
    +
    Leave me alone to woo him. Let's away,
    +
    And get our jewels and our wealth together,
    +
    Devise the fittest time and safest way
    +
    To hide us from pursuit that will be made
    +
    After my flight. Now go we in content
    +
    To liberty and not to banishment.
    +
    Exeunt
    +
    +
    +
    +
    + + diff --git a/cssselect2/tests/test_cssselect2.py b/cssselect2/tests/test_cssselect2.py new file mode 100644 index 0000000..2e9dbc4 --- /dev/null +++ b/cssselect2/tests/test_cssselect2.py @@ -0,0 +1,317 @@ +# coding: utf8 +""" + cssselect2.tests + ---------------- + + Test suite for cssselect2. + + :copyright: (c) 2012 by Simon Sapin, 2017 by Guillaume Ayoub. + :license: BSD, see LICENSE for more details. + +""" + +import json +import os.path +import xml.etree.ElementTree as etree + +import pytest + +from cssselect2 import ElementWrapper, SelectorError, compile_selector_list + + +def resource(filename): + return os.path.join(os.path.dirname(__file__), filename) + + +def load_json(filename): + return json.load(open(resource(filename))) + + +def get_test_document(): + document = etree.parse(resource('content.xhtml')) + parent = next(e for e in document.getiterator() if e.get('id') == 'root') + + # Setup namespace tests + for id in ('any-namespace', 'no-namespace'): + div = etree.SubElement(parent, '{http://www.w3.org/1999/xhtml}div') + div.set('id', id) + etree.SubElement(div, '{http://www.w3.org/1999/xhtml}div') \ + .set('id', id + '-div1') + etree.SubElement(div, '{http://www.w3.org/1999/xhtml}div') \ + .set('id', id + '-div2') + etree.SubElement(div, 'div').set('id', id + '-div3') + etree.SubElement(div, '{http://www.example.org/ns}div') \ + .set('id', id + '-div4') + + return document + + +TEST_DOCUMENT = get_test_document() + + +@pytest.mark.parametrize('test', load_json('invalid_selectors.json')) +def test_invalid_selectors(test): + if test.get('xfail'): + pytest.xfail() + try: + compile_selector_list(test['selector']) + except SelectorError: + pass + else: + raise AssertionError('Should be invalid: %(selector)r %(name)s' % test) + + +@pytest.mark.parametrize('test', load_json('valid_selectors.json')) +def test_valid_selectors(test): + if test.get('xfail'): + pytest.xfail() + exclude = test.get('exclude', ()) + if 'document' in exclude or 'xhtml' in exclude: + return + root = ElementWrapper.from_xml_root(TEST_DOCUMENT) + result = [e.id for e in root.query_all(test['selector'])] + if result != test['expect']: + print(test['selector']) + print(result) + print('!=') + print(test['expect']) + raise AssertionError(test['name']) + + +def test_lang(): + doc = etree.fromstring(''' + + ''') + assert not ElementWrapper.from_xml_root(doc).matches(':lang(fr)') + + doc = etree.fromstring(''' + + + + ''') + root = ElementWrapper.from_xml_root(doc, content_language='en') + assert root.matches(':lang(fr)') + + doc = etree.fromstring(''' + + + + ''') + root = ElementWrapper.from_xml_root(doc, content_language='en') + assert root.matches(':lang(en)') + + doc = etree.fromstring('') + root = ElementWrapper.from_xml_root(doc, content_language='en') + assert root.matches(':lang(en)') + + root = ElementWrapper.from_xml_root(doc, content_language='en, es') + assert not root.matches(':lang(en)') + + root = ElementWrapper.from_xml_root(doc) + assert not root.matches(':lang(en)') + + doc = etree.fromstring('') + root = ElementWrapper.from_html_root(doc) + assert root.matches(':lang(en)') + + doc = etree.fromstring('') + root = ElementWrapper.from_xml_root(doc) + assert not root.matches(':lang(en)') + + +def test_select(): + root = etree.fromstring(HTML_IDS) + + def select_ids(selector, html_only): + xml_ids = [element.etree_element.get('id', 'nil') for element in + ElementWrapper.from_xml_root(root).query_all(selector)] + html_ids = [element.etree_element.get('id', 'nil') for element in + ElementWrapper.from_html_root(root).query_all(selector)] + if html_only: + assert xml_ids == [] + else: + assert xml_ids == html_ids + return html_ids + + def pcss(main, *selectors, **kwargs): + html_only = kwargs.pop('html_only', False) + result = select_ids(main, html_only) + for selector in selectors: + assert select_ids(selector, html_only) == result + return result + + all_ids = pcss('*') + assert all_ids[:6] == [ + 'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'] + assert all_ids[-1:] == ['foobar-span'] + assert pcss('div') == ['outer-div', 'li-div', 'foobar-div'] + assert pcss('DIV', html_only=True) == [ + 'outer-div', 'li-div', 'foobar-div'] # case-insensitive in HTML + assert pcss('div div') == ['li-div'] + assert pcss('div, div div') == ['outer-div', 'li-div', 'foobar-div'] + assert pcss('div , div div') == ['outer-div', 'li-div', 'foobar-div'] + assert pcss('a[name]') == ['name-anchor'] + assert pcss('a[NAme]', html_only=True) == [ + 'name-anchor'] # case-insensitive in HTML: + assert pcss('a[rel]') == ['tag-anchor', 'nofollow-anchor'] + assert pcss('a[rel="tag"]') == ['tag-anchor'] + assert pcss('a[href*="localhost"]') == ['tag-anchor'] + assert pcss('a[href*=""]') == [] + assert pcss('a[href^="http"]') == ['tag-anchor', 'nofollow-anchor'] + assert pcss('a[href^="http:"]') == ['tag-anchor'] + assert pcss('a[href^=""]') == [] + assert pcss('a[href$="org"]') == ['nofollow-anchor'] + assert pcss('a[href$=""]') == [] + assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [ + 'foobar-div'] + assert pcss('[foobar~="ab bc"]', + '[foobar~=""]', '[foobar~=" \t"]') == [] + assert pcss('div[foobar~="cd"]') == [] + assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li'] + # Attribute values are case sensitive + assert pcss('*[lang|="en"]', '[lang|="en-US"]') == [] + assert pcss('*[lang|="e"]') == [] + # ... :lang() is not. + assert pcss( + ':lang(EN)', '*:lang(en-US)' + ':lang(En)' + ) == ['second-li', 'li-div'] + assert pcss(':lang(e)' # , html_only=True + ) == [] + assert pcss('li:nth-child(3)') == ['third-li'] + assert pcss('li:nth-child(10)') == [] + assert pcss('li:nth-child(2n)', 'li:nth-child(even)', + 'li:nth-child(2n+0)') == [ + 'second-li', 'fourth-li', 'sixth-li'] + assert pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)') == [ + 'first-li', 'third-li', 'fifth-li', 'seventh-li'] + assert pcss('li:nth-child(2n+4)') == ['fourth-li', 'sixth-li'] + assert pcss('li:nth-child(3n+1)') == [ + 'first-li', 'fourth-li', 'seventh-li'] + assert pcss('li:nth-last-child(1)') == ['seventh-li'] + assert pcss('li:nth-last-child(0)') == [] + assert pcss('li:nth-last-child(2n+2)', 'li:nth-last-child(even)') == [ + 'second-li', 'fourth-li', 'sixth-li'] + assert pcss('li:nth-last-child(2n+4)') == ['second-li', 'fourth-li'] + assert pcss('ol:first-of-type') == ['first-ol'] + assert pcss('ol:nth-child(1)') == [] + assert pcss('ol:nth-of-type(2)') == ['second-ol'] + assert pcss('ol:nth-last-of-type(2)') == ['first-ol'] + assert pcss('span:only-child') == ['foobar-span'] + assert pcss('div:only-child') == ['li-div'] + assert pcss('div *:only-child') == ['li-div', 'foobar-span'] + assert pcss('p *:only-of-type') == ['p-em', 'fieldset'] + assert pcss('p:only-of-type') == ['paragraph'] + assert pcss('a:empty', 'a:EMpty') == ['name-anchor'] + assert pcss('li:empty') == [ + 'third-li', 'fourth-li', 'fifth-li', 'sixth-li'] + assert pcss(':root', 'html:root') == ['html'] + assert pcss('li:root', '* :root') == [] + assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol'] + assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li'] + assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [ + 'third-li', 'fourth-li'] + assert pcss('#first-li', 'li#first-li', '*#first-li') == ['first-li'] + assert pcss('li div', 'li > div', 'div div') == ['li-div'] + assert pcss('div > div') == [] + assert pcss('div>.c', 'div > .c') == ['first-ol'] + assert pcss('div + div') == ['foobar-div'] + assert pcss('a ~ a') == ['tag-anchor', 'nofollow-anchor'] + assert pcss('a[rel="tag"] ~ a') == ['nofollow-anchor'] + assert pcss('ol#first-ol li:last-child') == ['seventh-li'] + assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li'] + assert pcss('#outer-div:first-child') == ['outer-div'] + assert pcss('#outer-div :first-child') == [ + 'name-anchor', 'first-li', 'li-div', 'p-b', + 'checkbox-fieldset-disabled', 'area-href'] + assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor'] + assert pcss(':not(*)') == [] + assert pcss('a:not([href])') == ['name-anchor'] + assert pcss('ol :Not([class])') == [ + 'first-li', 'second-li', 'li-div', + 'fifth-li', 'sixth-li', 'seventh-li'] + # Invalid characters in XPath element names, should not crash + assert pcss(r'di\a0 v', r'div\[') == [] + assert pcss(r'[h\a0 ref]', r'[h\]ref]') == [] + + assert pcss(':link') == [ + 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] + assert pcss('HTML :link', html_only=True) == [ + 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'] + assert pcss(':visited') == [] + assert pcss(':enabled') == [ + 'link-href', 'tag-anchor', 'nofollow-anchor', + 'checkbox-unchecked', 'text-checked', 'input-hidden', + 'checkbox-checked', 'area-href'] + assert pcss(':disabled') == [ + 'checkbox-disabled', 'input-hidden-disabled', + 'checkbox-disabled-checked', 'fieldset', + 'checkbox-fieldset-disabled', + 'hidden-fieldset-disabled'] + assert pcss(':checked') == [ + 'checkbox-checked', 'checkbox-disabled-checked'] + + +def test_select_shakespeare(): + document = etree.fromstring(HTML_SHAKESPEARE) + body = document.find('.//{http://www.w3.org/1999/xhtml}body') + body = ElementWrapper.from_xml_root(body) + + def count(selector): + return sum(1 for _ in body.query_all(selector)) + + # Data borrowed from http://mootools.net/slickspeed/ + + # # Changed from original; probably because I'm only + # # searching the body. + # assert count('*') == 252 + assert count('*') == 246 + # assert count('div:contains(CELIA)') == 26 + assert count('div:only-child') == 22 # ? + assert count('div:nth-child(even)') == 106 + assert count('div:nth-child(2n)') == 106 + assert count('div:nth-child(odd)') == 137 + assert count('div:nth-child(2n+1)') == 137 + assert count('div:nth-child(n)') == 243 + assert count('div:last-child') == 53 + assert count('div:first-child') == 51 + assert count('div > div') == 242 + assert count('div + div') == 190 + assert count('div ~ div') == 190 + assert count('body') == 1 + assert count('body div') == 243 + assert count('div') == 243 + assert count('div div') == 242 + assert count('div div div') == 241 + assert count('div, div, div') == 243 + assert count('div, a, span') == 243 + assert count('.dialog') == 51 + assert count('div.dialog') == 51 + assert count('div .dialog') == 51 + assert count('div.character, div.dialog') == 99 + assert count('div.direction.dialog') == 0 + assert count('div.dialog.direction') == 0 + assert count('div.dialog.scene') == 1 + assert count('div.scene.scene') == 1 + assert count('div.scene .scene') == 0 + assert count('div.direction .dialog ') == 0 + assert count('div .dialog .direction') == 4 + assert count('div.dialog .dialog .direction') == 4 + assert count('#speech5') == 1 + assert count('div#speech5') == 1 + assert count('div #speech5') == 1 + assert count('div.scene div.dialog') == 49 + assert count('div#scene1 div.dialog div') == 142 + assert count('#scene1 #speech1') == 1 + assert count('div[class]') == 103 + assert count('div[class=dialog]') == 50 + assert count('div[class^=dia]') == 51 + assert count('div[class$=log]') == 50 + assert count('div[class*=sce]') == 1 + assert count('div[class|=dialog]') == 50 # ? Seems right + # assert count('div[class!=madeup]') == 243 # ? Seems right + assert count('div[class~=dialog]') == 51 # ? Seems right + + +HTML_IDS = open(resource('ids.html')).read() +HTML_SHAKESPEARE = open(resource('shakespeare.html')).read() diff --git a/cssselect2/tests/valid_selectors.json b/cssselect2/tests/valid_selectors.json new file mode 100644 index 0000000..159741b --- /dev/null +++ b/cssselect2/tests/valid_selectors.json @@ -0,0 +1,1711 @@ +[ +{ + "name": "Type selector, matching html element", + "selector": "html", + "expect": [ + "html" + ], + "exclude": [ + "element", + "fragment", + "detached" + ], + "level": 1 +}, +{ + "name": "Type selector, matching html element", + "selector": "html", + "expect": [], + "exclude": [ + "document" + ], + "level": 1 +}, +{ + "name": "Type selector, matching body element", + "selector": "body", + "expect": [ + "body" + ], + "exclude": [ + "element", + "fragment", + "detached" + ], + "level": 1 +}, +{ + "name": "Type selector, matching body element", + "selector": "body", + "expect": [], + "exclude": [ + "document" + ], + "level": 1 +}, +{ + "name": "Universal selector, matching all children of element with specified ID", + "selector": "#universal>*", + "expect": [ + "universal-p1", + "universal-hr1", + "universal-pre1", + "universal-p2", + "universal-address1" + ], + "level": 2 +}, +{ + "name": "Universal selector, matching all grandchildren of element with specified ID", + "selector": "#universal>*>*", + "expect": [ + "universal-code1", + "universal-span1", + "universal-a1", + "universal-code2" + ], + "level": 2 +}, +{ + "name": "Universal selector, matching all children of empty element with specified ID", + "selector": "#empty>*", + "expect": [], + "level": 2 +}, +{ + "name": "Universal selector, matching all descendants of element with specified ID", + "selector": "#universal *", + "expect": [ + "universal-p1", + "universal-code1", + "universal-hr1", + "universal-pre1", + "universal-span1", + "universal-p2", + "universal-a1", + "universal-address1", + "universal-code2", + "universal-a2" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching align attribute with value", + "selector": ".attr-presence-div1[align]", + "expect": [ + "attr-presence-div1" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching align attribute with empty value", + "selector": ".attr-presence-div2[align]", + "expect": [ + "attr-presence-div2" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching title attribute, case insensitivity", + "selector": "#attr-presence [TiTlE]", + "expect": [ + "attr-presence-a1", + "attr-presence-span1" + ], + "exclude": [ + "xhtml" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, not matching title attribute, case sensitivity", + "selector": "#attr-presence [TiTlE]", + "expect": [], + "exclude": [ + "html" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching custom data-* attribute", + "selector": "[data-attr-presence]", + "expect": [ + "attr-presence-pre1", + "attr-presence-blockquote1" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, not matching attribute with similar name", + "selector": ".attr-presence-div3[align], .attr-presence-div4[align]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching attribute with non-ASCII characters", + "selector": "ul[data-中文]", + "expect": [ + "attr-presence-ul1" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, not matching default option without selected attribute", + "selector": "#attr-presence-select1 option[selected]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching option with selected attribute", + "selector": "#attr-presence-select2 option[selected]", + "expect": [ + "attr-presence-select2-option4" + ], + "level": 2 +}, +{ + "name": "Attribute presence selector, matching multiple options with selected attributes", + "selector": "#attr-presence-select3 option[selected]", + "expect": [ + "attr-presence-select3-option2", + "attr-presence-select3-option3" + ], + "level": 2 +}, +{ + "name": "Attribute value selector, matching align attribute with value", + "selector": "#attr-value [align=\"center\"]", + "expect": [ + "attr-value-div1" + ], + "level": 2 +}, +{ + "name": "Attribute value selector, matching align attribute with empty value", + "selector": "#attr-value [align=\"\"]", + "expect": [ + "attr-value-div2" + ], + "level": 2 +}, +{ + "name": "Attribute value selector, not matching align attribute with partial value", + "selector": "#attr-value [align=\"c\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute value selector, not matching align attribute with incorrect value", + "selector": "#attr-value [align=\"centera\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute value selector, matching custom data-* attribute with unicode escaped value", + "selector": "[data-attr-value=\"\\e9\"]", + "expect": [ + "attr-value-div3" + ], + "level": 2 +}, +{ + "name": "Attribute value selector, matching custom data-* attribute with escaped character", + "selector": "[data-attr-value_foo=\"\\e9\"]", + "expect": [ + "attr-value-div4" + ], + "level": 2 +}, +{ + "name": "Attribute value selector with single-quoted value, matching multiple inputs with type attributes", + "selector": "#attr-value input[type='hidden'],#attr-value input[type='radio']", + "expect": [ + "attr-value-input3", + "attr-value-input4", + "attr-value-input6", + "attr-value-input8", + "attr-value-input9" + ], + "level": 2 +}, +{ + "name": "Attribute value selector with double-quoted value, matching multiple inputs with type attributes", + "selector": "#attr-value input[type=\"hidden\"],#attr-value input[type='radio']", + "expect": [ + "attr-value-input3", + "attr-value-input4", + "attr-value-input6", + "attr-value-input8", + "attr-value-input9" + ], + "level": 2 +}, +{ + "name": "Attribute value selector with unquoted value, matching multiple inputs with type attributes", + "selector": "#attr-value input[type=hidden],#attr-value input[type=radio]", + "expect": [ + "attr-value-input3", + "attr-value-input4", + "attr-value-input6", + "attr-value-input8", + "attr-value-input9" + ], + "level": 2 +}, +{ + "name": "Attribute value selector, matching attribute with value using non-ASCII characters", + "selector": "[data-attr-value=中文]", + "expect": [ + "attr-value-div5" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, matching class attribute with value", + "selector": "#attr-whitespace [class~=\"div1\"]", + "expect": [ + "attr-whitespace-div1" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, not matching class attribute with empty value", + "selector": "#attr-whitespace [class~=\"\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, not matching class attribute with partial value", + "selector": "[data-attr-whitespace~=\"div\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, matching custom data-* attribute with unicode escaped value", + "selector": "[data-attr-whitespace~=\"\\0000e9\"]", + "expect": [ + "attr-whitespace-div4" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, matching custom data-* attribute with escaped character", + "selector": "[data-attr-whitespace_foo~=\"\\e9\"]", + "expect": [ + "attr-whitespace-div5" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector with single-quoted value, matching multiple links with rel attributes", + "selector": "#attr-whitespace a[rel~='bookmark'], #attr-whitespace a[rel~='nofollow']", + "expect": [ + "attr-whitespace-a1", + "attr-whitespace-a2", + "attr-whitespace-a3", + "attr-whitespace-a5", + "attr-whitespace-a7" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector with double-quoted value, matching multiple links with rel attributes", + "selector": "#attr-whitespace a[rel~=\"bookmark\"],#attr-whitespace a[rel~='nofollow']", + "expect": [ + "attr-whitespace-a1", + "attr-whitespace-a2", + "attr-whitespace-a3", + "attr-whitespace-a5", + "attr-whitespace-a7" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector with unquoted value, matching multiple links with rel attributes", + "selector": "#attr-whitespace a[rel~=bookmark], #attr-whitespace a[rel~=nofollow]", + "expect": [ + "attr-whitespace-a1", + "attr-whitespace-a2", + "attr-whitespace-a3", + "attr-whitespace-a5", + "attr-whitespace-a7" + ], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector with double-quoted value, not matching value with space", + "selector": "#attr-whitespace a[rel~=\"book mark\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute whitespace-separated list selector, matching title attribute with value using non-ASCII characters", + "selector": "#attr-whitespace [title~=中文]", + "expect": [ + "attr-whitespace-p1" + ], + "level": 2 +}, +{ + "name": "Attribute hyphen-separated list selector, not matching unspecified lang attribute", + "selector": "#attr-hyphen-div1[lang|=\"en\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute hyphen-separated list selector, matching lang attribute with exact value", + "selector": "#attr-hyphen-div2[lang|=\"fr\"]", + "expect": [ + "attr-hyphen-div2" + ], + "level": 2 +}, +{ + "name": "Attribute hyphen-separated list selector, matching lang attribute with partial value", + "selector": "#attr-hyphen-div3[lang|=\"en\"]", + "expect": [ + "attr-hyphen-div3" + ], + "level": 2 +}, +{ + "name": "Attribute hyphen-separated list selector, not matching incorrect value", + "selector": "#attr-hyphen-div4[lang|=\"es-AR\"]", + "expect": [], + "level": 2 +}, +{ + "name": "Attribute begins with selector, matching href attributes beginning with specified substring", + "selector": "#attr-begins a[href^=\"http://www\"]", + "expect": [ + "attr-begins-a1", + "attr-begins-a3" + ], + "level": 3 +}, +{ + "name": "Attribute begins with selector, matching lang attributes beginning with specified substring, ", + "selector": "#attr-begins [lang^=\"en-\"]", + "expect": [ + "attr-begins-div2", + "attr-begins-div4" + ], + "level": 3 +}, +{ + "name": "Attribute begins with selector, not matching class attribute not beginning with specified substring", + "selector": "#attr-begins [class^=apple]", + "expect": [], + "level": 3 +}, +{ + "name": "Attribute begins with selector with single-quoted value, matching class attribute beginning with specified substring", + "selector": "#attr-begins [class^=' apple']", + "expect": [ + "attr-begins-p1" + ], + "level": 3 +}, +{ + "name": "Attribute begins with selector with double-quoted value, matching class attribute beginning with specified substring", + "selector": "#attr-begins [class^=\" apple\"]", + "expect": [ + "attr-begins-p1" + ], + "level": 3 +}, +{ + "name": "Attribute begins with selector with unquoted value, not matching class attribute not beginning with specified substring", + "selector": "#attr-begins [class^= apple]", + "expect": [], + "level": 3 +}, +{ + "name": "Attribute ends with selector, matching href attributes ending with specified substring", + "selector": "#attr-ends a[href$=\".org\"]", + "expect": [ + "attr-ends-a1", + "attr-ends-a3" + ], + "level": 3 +}, +{ + "name": "Attribute ends with selector, matching lang attributes ending with specified substring, ", + "selector": "#attr-ends [lang$=\"-CH\"]", + "expect": [ + "attr-ends-div2", + "attr-ends-div4" + ], + "level": 3 +}, +{ + "name": "Attribute ends with selector, not matching class attribute not ending with specified substring", + "selector": "#attr-ends [class$=apple]", + "expect": [], + "level": 3 +}, +{ + "name": "Attribute ends with selector with single-quoted value, matching class attribute ending with specified substring", + "selector": "#attr-ends [class$='apple ']", + "expect": [ + "attr-ends-p1" + ], + "level": 3 +}, +{ + "name": "Attribute ends with selector with double-quoted value, matching class attribute ending with specified substring", + "selector": "#attr-ends [class$=\"apple \"]", + "expect": [ + "attr-ends-p1" + ], + "level": 3 +}, +{ + "name": "Attribute ends with selector with unquoted value, not matching class attribute not ending with specified substring", + "selector": "#attr-ends [class$=apple ]", + "expect": [], + "level": 3 +}, +{ + "name": "Attribute contains selector, matching href attributes beginning with specified substring", + "selector": "#attr-contains a[href*=\"http://www\"]", + "expect": [ + "attr-contains-a1", + "attr-contains-a3" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector, matching href attributes ending with specified substring", + "selector": "#attr-contains a[href*=\".org\"]", + "expect": [ + "attr-contains-a1", + "attr-contains-a2" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector, matching href attributes containing specified substring", + "selector": "#attr-contains a[href*=\".example.\"]", + "expect": [ + "attr-contains-a1", + "attr-contains-a3" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector, matching lang attributes beginning with specified substring, ", + "selector": "#attr-contains [lang*=\"en-\"]", + "expect": [ + "attr-contains-div2", + "attr-contains-div6" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector, matching lang attributes ending with specified substring, ", + "selector": "#attr-contains [lang*=\"-CH\"]", + "expect": [ + "attr-contains-div3", + "attr-contains-div5" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with single-quoted value, matching class attribute beginning with specified substring", + "selector": "#attr-contains [class*=' apple']", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with single-quoted value, matching class attribute ending with specified substring", + "selector": "#attr-contains [class*='orange ']", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with single-quoted value, matching class attribute containing specified substring", + "selector": "#attr-contains [class*='ple banana ora']", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with double-quoted value, matching class attribute beginning with specified substring", + "selector": "#attr-contains [class*=\" apple\"]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with double-quoted value, matching class attribute ending with specified substring", + "selector": "#attr-contains [class*=\"orange \"]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with double-quoted value, matching class attribute containing specified substring", + "selector": "#attr-contains [class*=\"ple banana ora\"]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with unquoted value, matching class attribute beginning with specified substring", + "selector": "#attr-contains [class*= apple]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with unquoted value, matching class attribute ending with specified substring", + "selector": "#attr-contains [class*=orange ]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": "Attribute contains selector with unquoted value, matching class attribute containing specified substring", + "selector": "#attr-contains [class*= banana ]", + "expect": [ + "attr-contains-p1" + ], + "level": 3 +}, +{ + "name": ":root pseudo-class selector, matching document root element", + "selector": ":root", + "expect": [ + "html" + ], + "exclude": [ + "element", + "fragment", + "detached" + ], + "level": 3 +}, +{ + "name": ":root pseudo-class selector, not matching document root element", + "selector": ":root", + "expect": [], + "exclude": [ + "document" + ], + "level": 3 +}, +{ + "name": ":nth-child selector, matching the third child element", + "selector": "#pseudo-nth-table1 :nth-child(3)", + "expect": [ + "pseudo-nth-td3", + "pseudo-nth-td9", + "pseudo-nth-tr3", + "pseudo-nth-td15" + ], + "level": 3 +}, +{ + "name": ":nth-child selector, matching every third child element", + "selector": "#pseudo-nth li:nth-child(3n)", + "expect": [ + "pseudo-nth-li3", + "pseudo-nth-li6", + "pseudo-nth-li9", + "pseudo-nth-li12" + ], + "level": 3 +}, +{ + "name": ":nth-child selector, matching every second child element, starting from the fourth", + "selector": "#pseudo-nth li:nth-child(2n+4)", + "expect": [ + "pseudo-nth-li4", + "pseudo-nth-li6", + "pseudo-nth-li8", + "pseudo-nth-li10", + "pseudo-nth-li12" + ], + "level": 3 +}, +{ + "name": ":nth-child selector, matching every fourth child element, starting from the third", + "selector": "#pseudo-nth-p1 :nth-child(4n-1)", + "expect": [ + "pseudo-nth-em2", + "pseudo-nth-span3" + ], + "level": 3 +}, +{ + "name": ":nth-last-child selector, matching the third last child element", + "selector": "#pseudo-nth-table1 :nth-last-child(3)", + "expect": [ + "pseudo-nth-tr1", + "pseudo-nth-td4", + "pseudo-nth-td10", + "pseudo-nth-td16" + ], + "level": 3 +}, +{ + "name": ":nth-last-child selector, matching every third child element from the end", + "selector": "#pseudo-nth li:nth-last-child(3n)", + "expect": [ + "pseudo-nth-li1", + "pseudo-nth-li4", + "pseudo-nth-li7", + "pseudo-nth-li10" + ], + "level": 3 +}, +{ + "name": ":nth-last-child selector, matching every second child element from the end, starting from the fourth last", + "selector": "#pseudo-nth li:nth-last-child(2n+4)", + "expect": [ + "pseudo-nth-li1", + "pseudo-nth-li3", + "pseudo-nth-li5", + "pseudo-nth-li7", + "pseudo-nth-li9" + ], + "level": 3 +}, +{ + "name": ":nth-last-child selector, matching every fourth element from the end, starting from the third last", + "selector": "#pseudo-nth-p1 :nth-last-child(4n-1)", + "expect": [ + "pseudo-nth-span2", + "pseudo-nth-span4" + ], + "level": 3 +}, +{ + "name": ":nth-of-type selector, matching the third em element", + "selector": "#pseudo-nth-p1 em:nth-of-type(3)", + "expect": [ + "pseudo-nth-em3" + ], + "level": 3 +}, +{ + "name": ":nth-of-type selector, matching every second element of their type", + "selector": "#pseudo-nth-p1 :nth-of-type(2n)", + "expect": [ + "pseudo-nth-em2", + "pseudo-nth-span2", + "pseudo-nth-span4", + "pseudo-nth-strong2", + "pseudo-nth-em4" + ], + "level": 3 +}, +{ + "name": ":nth-of-type selector, matching every second elemetn of their type, starting from the first", + "selector": "#pseudo-nth-p1 span:nth-of-type(2n-1)", + "expect": [ + "pseudo-nth-span1", + "pseudo-nth-span3" + ], + "level": 3 +}, +{ + "name": ":nth-last-of-type selector, matching the thrid last em element", + "selector": "#pseudo-nth-p1 em:nth-last-of-type(3)", + "expect": [ + "pseudo-nth-em2" + ], + "level": 3 +}, +{ + "name": ":nth-last-of-type selector, matching every second last element of their type", + "selector": "#pseudo-nth-p1 :nth-last-of-type(2n)", + "expect": [ + "pseudo-nth-span1", + "pseudo-nth-em1", + "pseudo-nth-strong1", + "pseudo-nth-em3", + "pseudo-nth-span3" + ], + "level": 3 +}, +{ + "name": ":nth-last-of-type selector, matching every second last element of their type, starting from the last", + "selector": "#pseudo-nth-p1 span:nth-last-of-type(2n-1)", + "expect": [ + "pseudo-nth-span2", + "pseudo-nth-span4" + ], + "level": 3 +}, +{ + "name": ":first-of-type selector, matching the first em element", + "selector": "#pseudo-nth-p1 em:first-of-type", + "expect": [ + "pseudo-nth-em1" + ], + "level": 3 +}, +{ + "name": ":first-of-type selector, matching the first of every type of element", + "selector": "#pseudo-nth-p1 :first-of-type", + "expect": [ + "pseudo-nth-span1", + "pseudo-nth-em1", + "pseudo-nth-strong1" + ], + "level": 3 +}, +{ + "name": ":first-of-type selector, matching the first td element in each table row", + "selector": "#pseudo-nth-table1 tr :first-of-type", + "expect": [ + "pseudo-nth-td1", + "pseudo-nth-td7", + "pseudo-nth-td13" + ], + "level": 3 +}, +{ + "name": ":last-of-type selector, matching the last em elemnet", + "selector": "#pseudo-nth-p1 em:last-of-type", + "expect": [ + "pseudo-nth-em4" + ], + "level": 3 +}, +{ + "name": ":last-of-type selector, matching the last of every type of element", + "selector": "#pseudo-nth-p1 :last-of-type", + "expect": [ + "pseudo-nth-span4", + "pseudo-nth-strong2", + "pseudo-nth-em4" + ], + "level": 3 +}, +{ + "name": ":last-of-type selector, matching the last td element in each table row", + "selector": "#pseudo-nth-table1 tr :last-of-type", + "expect": [ + "pseudo-nth-td6", + "pseudo-nth-td12", + "pseudo-nth-td18" + ], + "level": 3 +}, +{ + "name": ":first-child pseudo-class selector, matching first child div element", + "selector": "#pseudo-first-child div:first-child", + "expect": [ + "pseudo-first-child-div1" + ], + "level": 2 +}, +{ + "name": ":first-child pseudo-class selector, doesn't match non-first-child elements", + "selector": ".pseudo-first-child-div2:first-child, .pseudo-first-child-div3:first-child", + "expect": [], + "level": 2 +}, +{ + "name": ":first-child pseudo-class selector, matching first-child of multiple elements", + "selector": "#pseudo-first-child span:first-child", + "expect": [ + "pseudo-first-child-span1", + "pseudo-first-child-span3", + "pseudo-first-child-span5" + ], + "level": 2 +}, +{ + "name": ":last-child pseudo-class selector, matching last child div element", + "selector": "#pseudo-last-child div:last-child", + "expect": [ + "pseudo-last-child-div3" + ], + "level": 3 +}, +{ + "name": ":last-child pseudo-class selector, doesn't match non-last-child elements", + "selector": ".pseudo-last-child-div1:last-child, .pseudo-last-child-div2:first-child", + "expect": [], + "level": 3 +}, +{ + "name": ":last-child pseudo-class selector, matching first-child of multiple elements", + "selector": "#pseudo-last-child span:last-child", + "expect": [ + "pseudo-last-child-span2", + "pseudo-last-child-span4", + "pseudo-last-child-span6" + ], + "level": 3 +}, +{ + "name": ":pseudo-only-child pseudo-class selector, matching all only-child elements", + "selector": "#pseudo-only :only-child", + "expect": [ + "pseudo-only-span1" + ], + "level": 3 +}, +{ + "name": ":pseudo-only-child pseudo-class selector, matching only-child em elements", + "selector": "#pseudo-only em:only-child", + "expect": [], + "level": 3 +}, +{ + "name": ":pseudo-only-of-type pseudo-class selector, matching all elements with no siblings of the same type", + "selector": "#pseudo-only :only-of-type", + "expect": [ + "pseudo-only-span1", + "pseudo-only-em1" + ], + "level": 3 +}, +{ + "name": ":pseudo-only-of-type pseudo-class selector, matching em elements with no siblings of the same type", + "selector": "#pseudo-only em:only-of-type", + "expect": [ + "pseudo-only-em1" + ], + "level": 3 +}, +{ + "name": ":empty pseudo-class selector, matching empty p elements", + "selector": "#pseudo-empty p:empty", + "expect": [ + "pseudo-empty-p1", + "pseudo-empty-p2" + ], + "level": 3 +}, +{ + "name": ":empty pseudo-class selector, matching all empty elements", + "selector": "#pseudo-empty :empty", + "expect": [ + "pseudo-empty-p1", + "pseudo-empty-p2", + "pseudo-empty-span1" + ], + "level": 3 +}, +{ + "name": ":link and :visited pseudo-class selectors, matching a and area elements with href attributes", + "selector": "#pseudo-link :link, #pseudo-link :visited", + "expect": [ + "pseudo-link-a1", + "pseudo-link-a2", + "pseudo-link-area1" + ], + "level": 1 +}, +{ + "name": ":link and :visited pseudo-class selectors, matching link elements with href attributes", + "selector": "#head :link, #head :visited", + "expect": [ + "pseudo-link-link1", + "pseudo-link-link2" + ], + "exclude": [ + "element", + "fragment", + "detached" + ], + "level": 1 +}, +{ + "name": ":link and :visited pseudo-class selectors, not matching link elements with href attributes", + "selector": "#head :link, #head :visited", + "expect": [], + "exclude": [ + "document" + ], + "level": 1 +}, +{ + "name": ":link and :visited pseudo-class selectors, chained, mutually exclusive pseudo-classes match nothing", + "selector": ":link:visited", + "expect": [], + "exclude": [ + "document" + ], + "level": 1 +}, +{ + "name": ":target pseudo-class selector, matching the element referenced by the URL fragment identifier", + "selector": ":target", + "expect": [], + "exclude": [ + "document", + "element" + ], + "level": 3 +}, +{ + "name": ":target pseudo-class selector, matching the element referenced by the URL fragment identifier", + "selector": ":target", + "xfail": true, + "expect": [ + "target" + ], + "exclude": [ + "fragment", + "detached" + ], + "level": 3 +}, +{ + "name": ":lang pseudo-class selector, matching inherited language", + "selector": "#pseudo-lang-div1:lang(en)", + "expect": [ + "pseudo-lang-div1" + ], + "exclude": [ + "detached", + "fragment" + ], + "level": 2 +}, +{ + "name": ":lang pseudo-class selector, not matching element with no inherited language", + "selector": "#pseudo-lang-div1:lang(en)", + "expect": [], + "exclude": [ + "document", + "element" + ], + "level": 2 +}, +{ + "name": ":lang pseudo-class selector, matching specified language with exact value", + "selector": "#pseudo-lang-div2:lang(fr)", + "expect": [ + "pseudo-lang-div2" + ], + "level": 2 +}, +{ + "name": ":lang pseudo-class selector, matching specified language with partial value", + "selector": "#pseudo-lang-div3:lang(en)", + "expect": [ + "pseudo-lang-div3" + ], + "level": 2 +}, +{ + "name": ":lang pseudo-class selector, not matching incorrect language", + "selector": "#pseudo-lang-div4:lang(es-AR)", + "expect": [], + "level": 2 +}, +{ + "name": ":enabled pseudo-class selector, matching all enabled form controls", + "selector": "#pseudo-ui :enabled", + "expect": [ + "pseudo-ui-input1", + "pseudo-ui-input2", + "pseudo-ui-input3", + "pseudo-ui-input4", + "pseudo-ui-input5", + "pseudo-ui-input6", + "pseudo-ui-input7", + "pseudo-ui-input8", + "pseudo-ui-input9", + "pseudo-ui-textarea1", + "pseudo-ui-button1" + ], + "level": 3 +}, +{ + "name": ":enabled pseudo-class selector, matching all disabled form controls", + "selector": "#pseudo-ui :disabled", + "expect": [ + "pseudo-ui-input10", + "pseudo-ui-input11", + "pseudo-ui-input12", + "pseudo-ui-input13", + "pseudo-ui-input14", + "pseudo-ui-input15", + "pseudo-ui-input16", + "pseudo-ui-input17", + "pseudo-ui-input18", + "pseudo-ui-textarea2", + "pseudo-ui-button2" + ], + "level": 3 +}, +{ + "name": ":checked pseudo-class selector, matching checked radio buttons and checkboxes", + "selector": "#pseudo-ui :checked", + "expect": [ + "pseudo-ui-input4", + "pseudo-ui-input6", + "pseudo-ui-input13", + "pseudo-ui-input15" + ], + "level": 3 +}, +{ + "name": ":not pseudo-class selector, matching ", + "selector": "#not>:not(div)", + "expect": [ + "not-p1", + "not-p2", + "not-p3" + ], + "level": 3 +}, +{ + "name": ":not pseudo-class selector, matching ", + "selector": "#not * :not(:first-child)", + "expect": [ + "not-em1", + "not-em2", + "not-em3" + ], + "level": 3 +}, +{ + "name": ":not pseudo-class selector, matching nothing", + "selector": ":not(*)", + "expect": [], + "level": 3 +}, +{ + "name": ":not pseudo-class selector, matching nothing", + "selector": ":not(*|*)", + "expect": [], + "level": 3 +}, +{ + "name": ":first-line pseudo-element (one-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element:first-line", + "expect": [], + "level": 2 +}, +{ + "name": "::first-line pseudo-element (two-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element::first-line", + "expect": [], + "level": 3 +}, +{ + "name": ":first-letter pseudo-element (one-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element:first-letter", + "expect": [], + "level": 2 +}, +{ + "name": "::first-letter pseudo-element (two-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element::first-letter", + "expect": [], + "level": 3 +}, +{ + "name": ":before pseudo-element (one-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element:before", + "expect": [], + "level": 2 +}, +{ + "name": "::before pseudo-element (two-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element::before", + "expect": [], + "level": 3 +}, +{ + "name": ":after pseudo-element (one-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element:after", + "expect": [], + "level": 2 +}, +{ + "name": "::after pseudo-element (two-colon syntax) selector, not matching any elements", + "selector": "#pseudo-element::after", + "expect": [], + "level": 3 +}, +{ + "name": "Class selector, matching element with specified class", + "selector": ".class-p", + "expect": [ + "class-p1", + "class-p2", + "class-p3" + ], + "level": 1 +}, +{ + "name": "Class selector, chained, matching only elements with all specified classes", + "selector": "#class .apple.orange.banana", + "expect": [ + "class-div1", + "class-div2", + "class-p4", + "class-div3", + "class-p6", + "class-div4" + ], + "level": 1 +}, +{ + "name": "Class Selector, chained, with type selector", + "selector": "div.apple.banana.orange", + "expect": [ + "class-div1", + "class-div2", + "class-div3", + "class-div4" + ], + "level": 1 +}, +{ + "name": "Class selector, matching element with class value using non-ASCII characters", + "selector": ".台北Táiběi", + "expect": [ + "class-span1" + ], + "level": 1 +}, +{ + "name": "Class selector, matching multiple elements with class value using non-ASCII characters", + "selector": ".台北", + "expect": [ + "class-span1", + "class-span2" + ], + "level": 1 +}, +{ + "name": "Class selector, chained, matching element with multiple class values using non-ASCII characters", + "selector": ".台北Táiběi.台北", + "expect": [ + "class-span1" + ], + "level": 1 +}, +{ + "name": "Class selector, matching element with class with escaped character", + "selector": ".foo\\:bar", + "expect": [ + "class-span3" + ], + "level": 1 +}, +{ + "name": "Class selector, matching element with class with escaped character", + "selector": ".test\\.foo\\[5\\]bar", + "expect": [ + "class-span4" + ], + "level": 1 +}, +{ + "name": "ID selector, matching element with specified id", + "selector": "#id #id-div1", + "expect": [ + "id-div1" + ], + "level": 1 +}, +{ + "name": "ID selector, chained, matching element with specified id", + "selector": "#id-div1, #id-div1", + "expect": [ + "id-div1" + ], + "level": 1 +}, +{ + "name": "ID selector, chained, matching element with specified id", + "selector": "#id-div1, #id-div2", + "expect": [ + "id-div1", + "id-div2" + ], + "level": 1 +}, +{ + "name": "ID Selector, chained, with type selector", + "selector": "div#id-div1, div#id-div2", + "expect": [ + "id-div1", + "id-div2" + ], + "level": 1 +}, +{ + "name": "ID selector, not matching non-existent descendant", + "selector": "#id #none", + "expect": [], + "level": 1 +}, +{ + "name": "ID selector, not matching non-existent ancestor", + "selector": "#none #id-div1", + "expect": [], + "level": 1 +}, +{ + "name": "ID selector, matching multiple elements with duplicate id", + "selector": "#id-li-duplicate", + "expect": [ + "id-li-duplicate", + "id-li-duplicate", + "id-li-duplicate", + "id-li-duplicate" + ], + "level": 1 +}, +{ + "name": "ID selector, matching id value using non-ASCII characters", + "selector": "#台北Táiběi", + "expect": [ + "台北Táiběi" + ], + "level": 1 +}, +{ + "name": "ID selector, matching id value using non-ASCII characters", + "selector": "#台北", + "expect": [ + "台北" + ], + "level": 1 +}, +{ + "name": "ID selector, matching id values using non-ASCII characters", + "selector": "#台北Táiběi, #台北", + "expect": [ + "台北Táiběi", + "台北" + ], + "level": 1 +}, +{ + "name": "ID selector, matching element with id with escaped character", + "selector": "#\\#foo\\:bar", + "expect": [ + "#foo:bar" + ], + "level": 1 +}, +{ + "name": "ID selector, matching element with id with escaped character", + "selector": "#test\\.foo\\[5\\]bar", + "expect": [ + "test.foo[5]bar" + ], + "level": 1 +}, +{ + "name": "Namespace selector, matching element with any namespace", + "selector": "#any-namespace *|div", + "expect": [ + "any-namespace-div1", + "any-namespace-div2", + "any-namespace-div3", + "any-namespace-div4" + ], + "level": 3 +}, +{ + "name": "Namespace selector, matching div elements in no namespace only", + "selector": "#no-namespace |div", + "expect": [ + "no-namespace-div3" + ], + "level": 3 +}, +{ + "name": "Namespace selector, matching any elements in no namespace only", + "selector": "#no-namespace |*", + "expect": [ + "no-namespace-div3" + ], + "level": 3 +}, +{ + "name": "Descendant combinator, matching element that is a descendant of an element with id", + "selector": "#descendant div", + "expect": [ + "descendant-div1", + "descendant-div2", + "descendant-div3", + "descendant-div4" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, matching element with id that is a descendant of an element", + "selector": "body #descendant-div1", + "expect": [ + "descendant-div1" + ], + "exclude": [ + "detached", + "fragment" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, matching element with id that is a descendant of an element", + "selector": "div #descendant-div1", + "expect": [ + "descendant-div1" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, matching element with id that is a descendant of an element with id", + "selector": "#descendant #descendant-div2", + "expect": [ + "descendant-div2" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, matching element with class that is a descendant of an element with id", + "selector": "#descendant .descendant-div2", + "expect": [ + "descendant-div2" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, matching element with class that is a descendant of an element with class", + "selector": ".descendant-div1 .descendant-div3", + "expect": [ + "descendant-div3" + ], + "level": 1 +}, +{ + "name": "Descendant combinator, not matching element with id that is not a descendant of an element with id", + "selector": "#descendant-div1 #descendant-div4", + "expect": [], + "level": 1 +}, +{ + "name": "Descendant combinator, whitespace characters", + "selector": "#descendant\t\r\n#descendant-div2", + "expect": [ + "descendant-div2" + ], + "level": 1 +}, +{ + "name": "Child combinator, matching element that is a child of an element with id", + "selector": "#child>div", + "expect": [ + "child-div1", + "child-div4" + ], + "level": 2 +}, +{ + "name": "Child combinator, matching element with id that is a child of an element", + "selector": "div>#child-div1", + "expect": [ + "child-div1" + ], + "level": 2 +}, +{ + "name": "Child combinator, matching element with id that is a child of an element with id", + "selector": "#child>#child-div1", + "expect": [ + "child-div1" + ], + "level": 2 +}, +{ + "name": "Child combinator, matching element with id that is a child of an element with class", + "selector": "#child-div1>.child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Child combinator, matching element with class that is a child of an element with class", + "selector": ".child-div1>.child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Child combinator, not matching element with id that is not a child of an element with id", + "selector": "#child>#child-div3", + "expect": [], + "level": 2 +}, +{ + "name": "Child combinator, not matching element with id that is not a child of an element with class", + "selector": "#child-div1>.child-div3", + "expect": [], + "level": 2 +}, +{ + "name": "Child combinator, not matching element with class that is not a child of an element with class", + "selector": ".child-div1>.child-div3", + "expect": [], + "level": 2 +}, +{ + "name": "Child combinator, surrounded by whitespace", + "selector": "#child-div1\t\r\n>\t\r\n#child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Child combinator, whitespace after", + "selector": "#child-div1>\t\r\n#child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Child combinator, whitespace before", + "selector": "#child-div1\t\r\n>#child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Child combinator, no whitespace", + "selector": "#child-div1>#child-div2", + "expect": [ + "child-div2" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching element that is an adjacent sibling of an element with id", + "selector": "#adjacent-div2+div", + "expect": [ + "adjacent-div4" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching element with id that is an adjacent sibling of an element", + "selector": "div+#adjacent-div4", + "expect": [ + "adjacent-div4" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching element with id that is an adjacent sibling of an element with id", + "selector": "#adjacent-div2+#adjacent-div4", + "expect": [ + "adjacent-div4" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching element with class that is an adjacent sibling of an element with id", + "selector": "#adjacent-div2+.adjacent-div4", + "expect": [ + "adjacent-div4" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching element with class that is an adjacent sibling of an element with class", + "selector": ".adjacent-div2+.adjacent-div4", + "expect": [ + "adjacent-div4" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, matching p element that is an adjacent sibling of a div element", + "selector": "#adjacent div+p", + "expect": [ + "adjacent-p2" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, not matching element with id that is not an adjacent sibling of an element with id", + "selector": "#adjacent-div2+#adjacent-p2, #adjacent-div2+#adjacent-div1", + "expect": [], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, surrounded by whitespace", + "selector": "#adjacent-p2\t\r\n+\t\r\n#adjacent-p3", + "expect": [ + "adjacent-p3" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, whitespace after", + "selector": "#adjacent-p2+\t\r\n#adjacent-p3", + "expect": [ + "adjacent-p3" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, whitespace before", + "selector": "#adjacent-p2\t\r\n+#adjacent-p3", + "expect": [ + "adjacent-p3" + ], + "level": 2 +}, +{ + "name": "Adjacent sibling combinator, no whitespace", + "selector": "#adjacent-p2+#adjacent-p3", + "expect": [ + "adjacent-p3" + ], + "level": 2 +}, +{ + "name": "General sibling combinator, matching element that is a sibling of an element with id", + "selector": "#sibling-div2~div", + "expect": [ + "sibling-div4", + "sibling-div6" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, matching element with id that is a sibling of an element", + "selector": "div~#sibling-div4", + "expect": [ + "sibling-div4" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, matching element with id that is a sibling of an element with id", + "selector": "#sibling-div2~#sibling-div4", + "expect": [ + "sibling-div4" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, matching element with class that is a sibling of an element with id", + "selector": "#sibling-div2~.sibling-div", + "expect": [ + "sibling-div4", + "sibling-div6" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, matching p element that is a sibling of a div element", + "selector": "#sibling div~p", + "expect": [ + "sibling-p2", + "sibling-p3" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, not matching element with id that is not a sibling after a p element", + "selector": "#sibling>p~div", + "expect": [], + "level": 3 +}, +{ + "name": "General sibling combinator, not matching element with id that is not a sibling after an element with id", + "selector": "#sibling-div2~#sibling-div3, #sibling-div2~#sibling-div1", + "expect": [], + "level": 3 +}, +{ + "name": "General sibling combinator, surrounded by whitespace", + "selector": "#sibling-p2\t\r\n~\t\r\n#sibling-p3", + "expect": [ + "sibling-p3" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, whitespace after", + "selector": "#sibling-p2~\t\r\n#sibling-p3", + "expect": [ + "sibling-p3" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, whitespace before", + "selector": "#sibling-p2\t\r\n~#sibling-p3", + "expect": [ + "sibling-p3" + ], + "level": 3 +}, +{ + "name": "General sibling combinator, no whitespace", + "selector": "#sibling-p2~#sibling-p3", + "expect": [ + "sibling-p3" + ], + "level": 3 +}, +{ + "name": "Syntax, group of selectors separator, surrounded by whitespace", + "selector": "#group em\t\r \n,\t\r \n#group strong", + "expect": [ + "group-em1", + "group-strong1" + ], + "level": 1 +}, +{ + "name": "Syntax, group of selectors separator, whitespace after", + "selector": "#group em,\t\r\n#group strong", + "expect": [ + "group-em1", + "group-strong1" + ], + "level": 1 +}, +{ + "name": "Syntax, group of selectors separator, whitespace before", + "selector": "#group em\t\r\n,#group strong", + "expect": [ + "group-em1", + "group-strong1" + ], + "level": 1 +}, +{ + "name": "Syntax, group of selectors separator, no whitespace", + "selector": "#group em,#group strong", + "expect": [ + "group-em1", + "group-strong1" + ], + "level": 1 +} +] diff --git a/cssselect2/tree.py b/cssselect2/tree.py new file mode 100644 index 0000000..34f4830 --- /dev/null +++ b/cssselect2/tree.py @@ -0,0 +1,366 @@ +# coding: utf8 + +from __future__ import unicode_literals + +import xml.etree.ElementTree as etree + +from webencodings import ascii_lower + +from ._compat import basestring, ifilter +from .compiler import compile_selector_list, split_whitespace + + +class cached_property(object): + # Borrowed from Werkzeug + # https://github.com/mitsuhiko/werkzeug/blob/master/werkzeug/utils.py + + def __init__(self, func, name=None, doc=None): + self.__name__ = name or func.__name__ + self.__module__ = func.__module__ + self.__doc__ = doc or func.__doc__ + self.func = func + + def __get__(self, obj, type=None, __missing=object()): + if obj is None: + return self + value = obj.__dict__.get(self.__name__, __missing) + if value is __missing: + value = self.func(obj) + obj.__dict__[self.__name__] = value + return value + + +class ElementWrapper(object): + """ + A wrapper for an ElementTree :class:`~xml.etree.ElementTree.Element` + for Selector matching. + + This class should not be instanciated directly. + :meth:`from_xml_root` or :meth:`from_html_root` should be used + for the root element of a document, + and other elements should be accessed (and wrappers generated) + using methods such as :meth:`iter_children` and :meth:`iter_subtree`. + + :class:`ElementWrapper` objects compare equal + if their underlying :class:`~xml.etree.ElementTree.Element` do. + + """ + @classmethod + def from_xml_root(cls, root, content_language=None): + """Wrap for selector matching the root of an XML or XHTML document. + + :param root: + An ElementTree :class:`~xml.etree.ElementTree.Element` + for the root element of a document. + If the given element is not the root, + selector matching will behave is if it were. + In other words, selectors will be `scope-contained`_ + to the subtree rooted at that element. + :returns: + A new :class:`ElementWrapper` + + .. _scope-contained: + http://dev.w3.org/csswg/selectors4/#scope-contained-selectors + + """ + return cls._from_root(root, content_language, in_html_document=False) + + @classmethod + def from_html_root(cls, root, content_language=None): + """Same as :meth:`from_xml_root`, + but for documents parsed with an HTML parser + like `html5lib `_, + which should be the case of documents with the ``text/html`` MIME type. + + Compared to :meth:`from_xml_root`, + this makes element attribute names in Selectors case-insensitive. + + """ + return cls._from_root(root, content_language, in_html_document=True) + + @classmethod + def _from_root(cls, root, content_language, in_html_document=True): + if hasattr(root, 'getroot'): + root = root.getroot() + return cls(root, parent=None, index=0, previous=None, + in_html_document=in_html_document, + content_language=content_language) + + def __init__(self, etree_element, parent, index, previous, + in_html_document, content_language=None): + #: The underlying ElementTree :class:`~xml.etree.ElementTree.Element` + self.etree_element = etree_element + #: The parent :class:`ElementWrapper`, + #: or :obj:`None` for the root element. + self.parent = parent + #: The previous sibling :class:`ElementWrapper`, + #: or :obj:`None` for the root element. + self.previous = previous + if parent is not None: + #: The :attr:`parent`’s children + #: as a list of + #: ElementTree :class:`~xml.etree.ElementTree.Element`s. + #: For the root (which has no parent) + self.etree_siblings = parent.etree_children + else: + self.etree_siblings = [etree_element] + #: The position within the :attr:`parent`’s children, counting from 0. + #: ``e.etree_siblings[e.index]`` is always ``e.etree_element``. + self.index = index + self.in_html_document = in_html_document + self.transport_content_language = content_language + + def __eq__(self, other): + return (type(self) == type(other) and + self.etree_element == other.etree_element) + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((type(self), self.etree_element)) + + def __iter__(self): + for element in self.iter_children(): + yield element + + def iter_ancestors(self): + """Return an iterator of existing :class:`ElementWrapper` objects + for this element’s ancestors, + in reversed tree order (from :attr:`parent` to the root) + + The element itself is not included, + this is an empty sequence for the root element. + + """ + element = self + while element.parent is not None: + element = element.parent + yield element + + def iter_previous_siblings(self): + """Return an iterator of existing :class:`ElementWrapper` objects + for this element’s previous siblings, + in reversed tree order. + + The element itself is not included, + this is an empty sequence for a first child or the root element. + + """ + element = self + while element.previous is not None: + element = element.previous + yield element + + def iter_children(self): + """Return an iterator of newly-created :class:`ElementWrapper` objects + for this element’s child elements, + in tree order. + + """ + child = None + for i, etree_child in enumerate(self.etree_children): + child = type(self)( + etree_child, + parent=self, + index=i, + previous=child, + in_html_document=self.in_html_document, + ) + yield child + + def iter_subtree(self): + """Return an iterator of newly-created :class:`ElementWrapper` objects + for the entire subtree rooted at this element, + in tree order. + + Unlike in other methods, the element itself *is* included. + + This loops over an entire document: + + .. code-block:: python + + for element in ElementWrapper.from_root(root_etree).iter_subtree(): + ... + + """ + stack = [iter([self])] + while stack: + element = next(stack[-1], None) + if element is None: + stack.pop() + else: + yield element + stack.append(element.iter_children()) + + @staticmethod + def _compile(selectors): + return [ + compiled_selector.test + for selector in selectors + for compiled_selector in ( + [selector] if hasattr(selector, 'test') + else compile_selector_list(selector) + ) + if compiled_selector.pseudo_element is None and + not compiled_selector.never_matches + ] + + def matches(self, *selectors): + """Return wether this elememt matches any of the given selectors. + + :param selectors: + Each given selector is either a :class:`CompiledSelector`, + or an argument to :func:`compile_selector_list`. + + """ + return any(test(self) for test in self._compile(selectors)) + + def query_all(self, *selectors): + """ + Return elements, in tree order, that match any of the given selectors. + + Selectors are `scope-filtered`_ to the subtree rooted at this element. + + .. _scope-filtered: http://dev.w3.org/csswg/selectors4/#scope-filtered + + :param selectors: + Each given selector is either a :class:`CompiledSelector`, + or an argument to :func:`compile_selector_list`. + :returns: + An iterator of newly-created :class:`ElementWrapper` objects. + + """ + tests = self._compile(selectors) + if len(tests) == 1: + return ifilter(tests[0], self.iter_subtree()) + elif selectors: + return ( + element + for element in self.iter_subtree() + if any(test(element) for test in tests) + ) + else: + return iter(()) + + def query(self, *selectors): + """Return the first element (in tree order) + that matches any of the given selectors. + + :param selectors: + Each given selector is either a :class:`CompiledSelector`, + or an argument to :func:`compile_selector_list`. + :returns: + A newly-created :class:`ElementWrapper` object, + or :obj:`None` if there is no match. + + """ + return next(self.query_all(*selectors), None) + + @cached_property + def etree_children(self): + """This element’s children, + as a list of ElementTree :class:`~xml.etree.ElementTree.Element`. + + Other ElementTree nodes such as + :class:`comments <~xml.etree.ElementTree.Comment>` and + :class:`processing instructions + <~xml.etree.ElementTree.ProcessingInstruction>` + are not included. + + """ + return [c for c in self.etree_element if isinstance(c.tag, basestring)] + + @cached_property + def local_name(self): + """The local name of this element, as a string.""" + namespace_url, local_name = _split_etree_tag(self.etree_element.tag) + self.__dict__[str('namespace_url')] = namespace_url + return local_name + + @cached_property + def namespace_url(self): + """The namespace URL of this element, as a string.""" + namespace_url, local_name = _split_etree_tag(self.etree_element.tag) + self.__dict__[str('local_name')] = local_name + return namespace_url + + @cached_property + def id(self): + """The ID of this element, as a string.""" + return self.etree_element.get('id') + + @cached_property + def classes(self): + """The classes of this element, as a :class:`set` of strings.""" + return set(split_whitespace(self.etree_element.get('class', ''))) + + @cached_property + def lang(self): + """The language of this element, as a string.""" + # http://whatwg.org/C#language + xml_lang = self.etree_element.get( + '{http://www.w3.org/XML/1998/namespace}lang') + if xml_lang is not None: + return ascii_lower(xml_lang) + is_html = ( + self.in_html_document or + self.namespace_url == 'http://www.w3.org/1999/xhtml') + if is_html: + lang = self.etree_element.get('lang') + if lang is not None: + return ascii_lower(lang) + if self.parent is not None: + return self.parent.lang + # Root elememnt + if is_html: + content_language = None + for meta in etree_iter(self.etree_element, + '{http://www.w3.org/1999/xhtml}meta'): + http_equiv = meta.get('http-equiv', '') + if ascii_lower(http_equiv) == 'content-language': + content_language = _parse_content_language( + meta.get('content')) + if content_language is not None: + return ascii_lower(content_language) + # Empty string means unknown + return _parse_content_language(self.transport_content_language) or '' + + @cached_property + def in_disabled_fieldset(self): + if self.parent is None: + return False + if (self.parent.etree_element.tag == ( + '{http://www.w3.org/1999/xhtml}fieldset') and + self.parent.etree_element.get('disabled') is not None and ( + self.etree_element.tag != ( + '{http://www.w3.org/1999/xhtml}legend') or + any(s.etree_element.tag == ( + '{http://www.w3.org/1999/xhtml}legend') + for s in self.iter_previous_siblings()))): + return True + return self.parent.in_disabled_fieldset + + +def _split_etree_tag(tag): + pos = tag.rfind('}') + if pos == -1: + return '', tag + else: + assert tag[0] == '{' + return tag[1:pos], tag[pos + 1:] + + +if hasattr(etree.Element, 'iter'): + def etree_iter(element, tag=None): + return element.iter(tag) +else: + def etree_iter(element, tag=None): + return element.getiterator(tag) + + +def _parse_content_language(value): + if value is not None and ',' not in value: + parts = split_whitespace(value) + if len(parts) == 1: + return parts[0] diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..614af51 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# cssselect2 documentation build configuration file. + +import codecs +import re +from os import path + + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'cssselect2' +copyright = '2012-2017, Simon Sapin' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = re.search("VERSION = '([^']+)'", codecs.open( + path.join(path.dirname(path.dirname(__file__)), 'cssselect2', '__init__.py'), + encoding='utf-8', +).read().strip()).group(1) + +# The short X.Y version. +version = '.'.join(release.split('.')[:2]) + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Output file base name for HTML help builder. +htmlhelp_basename = 'cssselect2doc' + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'cssselect2', 'cssselect2 Documentation', + ['Simon Sapin'], 1) +] + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'cssselect2', 'cssselect2 Documentation', + 'Simon Sapin', 'cssselect2', 'One line description of project.', + 'Miscellaneous'), +] + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'py2': ('http://docs.python.org/2', None), + 'py3': ('http://docs.python.org/3', None), + 'webencodings': ('http://pythonhosted.org/webencodings/', None)} diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..4da6c0f --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,41 @@ +.. include:: ../README.rst + + +Installation +============ + +Installing cssselect2 with pip_ should Just Work:: + + pip install cssselect2 + +This will also automatically install cssselect2’s only dependency, tinycss2_. +cssselect2 and tinycss2 both only contain Python code and should work on any +Python implementation, although they’re only tested on CPython. + +.. _pip: http://pip-installer.org/ +.. _tinycss2: http://tinycss2.readthedocs.io/ + + +Basic Example +============= + +Here is a classical cssselect2 workflow: + +- parse a CSS stylesheet using tinycss2_, +- store the CSS rules in a :meth:`~cssselect2.Matcher` object, +- parse an HTML document using an ElementTree-like parser, +- wrap the HTML tree in a :meth:`~cssselect2.ElementWrapper` object, +- find the CSS rules matching each HTML tag, using the matcher and the wrapper. + +.. literalinclude:: ../example.py + +.. module:: cssselect2 +.. autoclass:: Matcher + :members: +.. autofunction:: compile_selector_list +.. autoclass:: ElementWrapper + :members: +.. autoclass:: SelectorError + + +.. include:: ../CHANGES diff --git a/example.py b/example.py new file mode 100644 index 0000000..149822e --- /dev/null +++ b/example.py @@ -0,0 +1,49 @@ +from xml.etree import ElementTree + +import cssselect2 +import tinycss2 + + +# Parse CSS and add rules to the matcher + +matcher = cssselect2.Matcher() + +rules = tinycss2.parse_stylesheet(''' + body { font-size: 2em } + body p { background: red } + p { color: blue } +''', skip_whitespace=True) + +for rule in rules: + selectors = cssselect2.compile_selector_list(rule.prelude) + selector_string = tinycss2.serialize(rule.prelude) + content_string = tinycss2.serialize(rule.content) + payload = (selector_string, content_string) + for selector in selectors: + matcher.add_selector(selector, payload) + + +# Parse HTML and find CSS rules applying to each tag + +html_tree = ElementTree.fromstring(''' + + +

    Test

    + + +''') +wrapper = cssselect2.ElementWrapper.from_html_root(html_tree) +for element in wrapper.iter_subtree(): + tag = element.etree_element.tag.split('}')[-1] + print('Found tag "{}" in HTML'.format(tag)) + + matches = matcher.match(element) + if matches: + for match in matches: + specificity, order, pseudo, payload = match + selector_string, content_string = payload + print('Matching selector "{}" ({})'.format( + selector_string, content_string)) + else: + print('No rule matching this tag') + print() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..99b7654 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,14 @@ +[aliases] +test = pytest + +[bdist_wheel] +universal = 1 + +[tool:pytest] +addopts = --cov=cssselect2 --flake8 --isort cssselect2 +norecursedirs = dist .cache .git build *.egg-info .eggs venv + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..bdf7ac4 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# coding: utf8 + +import os.path +import re +import sys + +from setuptools import setup + +ROOT = os.path.dirname(__file__) +README = open(os.path.join(ROOT, 'README.rst')).read() +INIT_PY = open(os.path.join(ROOT, 'cssselect2', '__init__.py')).read() +VERSION = re.search("VERSION = '([^']+)'", INIT_PY).group(1) + +needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv) +pytest_runner = ['pytest-runner'] if needs_pytest else [] + +setup( + name='cssselect2', + version=VERSION, + author='Simon Sapin', + author_email='simon.sapin@exyr.org', + description='CSS selectors for Python ElementTree', + long_description=README, + url='http://packages.python.org/cssselect2/', + license='BSD', + packages=['cssselect2'], + package_data={'cssselect2': ['tests/*']}, + install_requires=['tinycss2'], + setup_requires=pytest_runner, + test_suite='cssselect2.tests', + tests_require=[ + 'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort'], + extras_require={'test': [ + 'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort']}, +)