diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index 1b427a0..b815d28 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -198,6 +198,7 @@ class XPath1Parser(Parser): def parse(self, source): root_token = super(XPath1Parser, self).parse(source) + root_token.is_root = True try: root_token.evaluate() # Static context evaluation except MissingContextError: @@ -233,56 +234,18 @@ literal('(string)') literal('(float)') literal('(decimal)') literal('(integer)') -literal('(name)', bp=10) -@method('(name)') +@method(literal('(name)', bp=10)) def evaluate(self, context=None): - if context is None: - return - name = self.value - if name[0] != '{' and self.parser.default_namespace: - tag = u'{%s}%s' % (self.parser.default_namespace, name) - else: - tag = name - - if isinstance(context, XPathSchemaContext): - # Bind with the XSD type - xsd_type = self.match_xsd_type(context.item, name) - if xsd_type is not None: - if isinstance(context.item, AttributeNode): - primitive_type = self.parser.schema.get_primitive_type(xsd_type) - return XSD_BUILTIN_TYPES[primitive_type.local_name].value - else: - return context.item - - elif self.xsd_type is None: - # Untyped evaluation - if is_attribute_node(context.item, name): - return context.item[1] - elif is_element_node(context.item, tag): - return context.item - else: - # XSD typed evaluation - try: - if is_attribute_node(context.item, name): - return self.xsd_type.decode(context.item[1]) - elif is_element_node(context.item, tag): - if self.xsd_type.is_simple(): - return self.xsd_type.decode(context.item.text) - elif self.xsd_type.has_simple_content(): - self.xsd_type.decode(context.item.text) - return context.item - - except (TypeError, ValueError): - msg = "Type {!r} is not appropriate for the context item {!r}" - self.wrong_context_type(msg.format(self.xsd_type, context.item)) + return [x for x in self.select(context)] or None @method('(name)') def select(self, context=None): if context is None: return + name = self.value if name[0] != '{' and self.parser.default_namespace: tag = u'{%s}%s' % (self.parser.default_namespace, name) @@ -304,7 +267,7 @@ def select(self, context=None): # Untyped selection for item in context.iter_children_or_self(): if is_attribute_node(item, name): - yield item[1] + yield item elif is_element_node(item, tag): yield item else: @@ -312,7 +275,7 @@ def select(self, context=None): for item in context.iter_children_or_self(): try: if is_attribute_node(item, name): - yield self.xsd_type.decode(item[1]) + yield AttributeNode(item[0], self.xsd_type.decode(item[1])) elif is_element_node(item, tag): if self.xsd_type.is_simple(): yield self.xsd_type.decode(item.text) @@ -652,15 +615,27 @@ def evaluate(self, context=None): ### # Union expressions -@method(infix('|', bp=50)) +@method('|', bp=50) +def led(self, left): + self.cut_and_sort = True + if left.symbol in {'|', 'union'}: + left.cut_and_sort = False + self[:] = left, self.parser.expression(rbp=50) + return self + + +@method('|') def select(self, context=None): - if context is not None: - results = {item for k in range(2) for item in self[k].select(context.copy())} - for item in context.iter(): - if item in results: + if context is None: + return + elif not self.cut_and_sort: + for k in range(2): + for item in self[k].select(context.copy()): yield item - elif is_attribute_node(item) and item[1] in results: - yield item[1] + else: + results = {item for k in range(2) for item in self[k].select(context.copy())} + for item in context.iter_results(results, self.is_root): + yield item ### @@ -705,8 +680,8 @@ def select(self, context=None): left_results = list(self[0].select(context)) context.size = len(left_results) for context.position, context.item in enumerate(left_results): - if not is_element_node(context.item): - self.wrong_type("left operand must returns element nodes: {}".format(context.item)) + if not is_xpath_node(context.item): + self.wrong_type("left operand must returns XPath nodes: {}".format(context.item)) for result in self[1].select(context): if is_etree_element(result) or isinstance(result, tuple): if result not in items: @@ -878,7 +853,7 @@ def select(self, context=None): for _ in context.iter_attributes(): for result in self[0].select(context): - yield result + yield result[1] if self.is_root else result @method(axis('namespace')) diff --git a/elementpath/xpath2_constructors.py b/elementpath/xpath2_constructors.py index 38b7949..f017e9c 100644 --- a/elementpath/xpath2_constructors.py +++ b/elementpath/xpath2_constructors.py @@ -400,9 +400,9 @@ def select(self, context=None): for result in self[0].select(context): yield result else: - attribute_name = self[0].evaluate(context) if self else None + name = self[0].evaluate(context) if self else None for item in context.iter_attributes(): - if is_attribute_node(item, attribute_name): + if is_attribute_node(item, name): yield context.item[1] diff --git a/elementpath/xpath2_functions.py b/elementpath/xpath2_functions.py index 3d12786..e80a801 100644 --- a/elementpath/xpath2_functions.py +++ b/elementpath/xpath2_functions.py @@ -646,7 +646,7 @@ def evaluate(self, context=None): @method(function('string-join', nargs=2)) def evaluate(self, context=None): - items = [self.string_value(s) if is_element_node(s) else s + items = [self.string_value(s) if is_element_node(s) or is_attribute_node(s) else s for s in self[0].select(context)] try: return self.get_argument(context, 1, cls=string_base_type).join(items) diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py index 07e05d3..000071e 100644 --- a/elementpath/xpath2_parser.py +++ b/elementpath/xpath2_parser.py @@ -339,6 +339,7 @@ class XPath2Parser(XPath1Parser): def parse(self, source): root_token = super(XPath1Parser, self).parse(source) + root_token.is_root = True if self.schema is None: try: @@ -381,34 +382,25 @@ register('?') register('(:') register(':)') - ### # Node sequence composition -@method(infix('union', bp=50)) -def select(self, context=None): - if context is not None: - results = {item for k in range(2) for item in self[k].select(context.copy())} - for item in context.iter(): - if item in results: - yield item +XPath2Parser.duplicate('|', 'union') @method(infix('intersect', bp=55)) def select(self, context=None): if context is not None: results = set(self[0].select(context.copy())) & set(self[1].select(context.copy())) - for item in context.iter(): - if item in results: - yield item + for item in context.iter_results(results, self.is_root): + yield item @method(infix('except', bp=55)) def select(self, context=None): if context is not None: results = set(self[0].select(context.copy())) - set(self[1].select(context.copy())) - for item in context.iter(): - if item in results: - yield item + for item in context.iter_results(results, self.is_root): + yield item ### diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py index eb1f4ed..85f15c4 100644 --- a/elementpath/xpath_nodes.py +++ b/elementpath/xpath_nodes.py @@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types. """ from collections import namedtuple -from .compat import PY3, urlparse +from .compat import PY3, urlparse, unicode_type from .namespaces import XML_BASE, XSI_NIL from .exceptions import ElementPathValueError from .datatypes import ncname_validator @@ -23,6 +23,9 @@ from .datatypes import ncname_validator AttributeNode = namedtuple('Attribute', 'name value') """A namedtuple-based type to represent XPath attributes.""" +ElementNode = namedtuple('Element', 'tag text attrib') +"""A namedtuple-based type to represent XPath element simple and simple-content nodes.""" + NamespaceNode = namedtuple('Namespace', 'prefix uri') """A namedtuple-based type to represent XPath namespaces.""" @@ -34,11 +37,15 @@ def is_etree_element(obj): def elem_iter_strings(elem): - for e in elem.iter(): - if e.text is not None: - yield e.text - if e.tail is not None and e is not elem: - yield e.tail + if isinstance(elem, ElementNode): + if elem.text is not None: + yield unicode_type(elem.text) + else: + for e in elem.iter(): + if e.text is not None: + yield e.text + if e.tail is not None and e is not elem: + yield e.tail ### @@ -51,7 +58,7 @@ def elem_iter_strings(elem): # Element-like objects are used for representing elements and comments, ElementTree-like objects # for documents. Generic tuples are used for representing attributes and named-tuples for namespaces. ### -def is_element_node(obj, tag=None, default_namespace=None): +def is_element_node(obj, tag=None): """ Returns `True` if the first argument is an element node matching the tag, `False` otherwise. Raises a ValueError if the argument tag has to be used but it's in a wrong format. diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py index f1bf231..07cf85e 100644 --- a/elementpath/xpath_token.py +++ b/elementpath/xpath_token.py @@ -24,7 +24,7 @@ import locale import contextlib from decimal import Decimal -from .compat import string_base_type +from .compat import string_base_type, unicode_type from .exceptions import xpath_error from .namespaces import XQT_ERRORS_NAMESPACE from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \ @@ -53,8 +53,9 @@ def ordinal(n): class XPathToken(Token): """Base class for XPath tokens.""" - comment = None # for XPath 2.0+ comments - xsd_type = None # fox XPath 2.0+ schema types labeling + is_root = False # Flag that is set to True for root token instances + comment = None # for XPath 2.0+ comments + xsd_type = None # fox XPath 2.0+ schema types labeling def evaluate(self, context=None): """ @@ -423,33 +424,22 @@ class XPathToken(Token): locale.setlocale(locale.LC_ALL, default_locale) ### - # XPath data conversion base functions + # XPath data accessors base functions def data_value(self, obj): """ The typed value, as computed by fn:data() on each item. Returns an instance of UntypedAtomic. """ + if is_attribute_node(obj): + obj = obj[1] + if obj is None: return elif not is_xpath_node(obj): return obj elif hasattr(obj, 'type'): return self.schema_node_value(obj) # Schema context - elif self.xsd_type is None: - return UntypedAtomic(self.string_value(obj)) - - # XSD type bound data - try: - if is_attribute_node(obj): - return self.xsd_type.decode(obj[1]) - elif is_element_node(obj): - return self.xsd_type.decode(obj.text) - except TypeError as err: - self.wrong_type(str(err)) - except ValueError as err: - self.wrong_value(str(err)) - else: - return UntypedAtomic(self.string_value(obj)) + return UntypedAtomic(self.string_value(obj)) def boolean_value(self, obj): """ @@ -481,7 +471,7 @@ class XPathToken(Token): elif is_element_node(obj): return ''.join(elem_iter_strings(obj)) elif is_attribute_node(obj): - return obj[1] + return unicode_type(obj[1]) elif is_text_node(obj): return obj elif is_document_node(obj): diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py index 2121d04..ab39cba 100644 --- a/tests/test_xpath1_parser.py +++ b/tests/test_xpath1_parser.py @@ -811,6 +811,9 @@ class XPath1ParserTest(unittest.TestCase): self.check_value("1 and 1", True) self.check_value("1 and 'jupiter'", True) self.check_value("0 and 'mars'", False) + + self.check_value("mars") + self.check_value("1 and mars", False) def test_comparison_operators(self): @@ -1160,9 +1163,11 @@ class XPath1ParserTest(unittest.TestCase): self.check_value("a[preceding::a[not(b)]]", [], context=XPathContext(root, item=root[1])) def test_union(self): - root = self.etree.XML('') + root = self.etree.XML('') self.check_selector('/A/B2 | /A/B1', root, root[:2]) self.check_selector('/A/B2 | /A/*', root, root[:]) + self.check_selector('/A/B2 | /A/* | /A/B1', root, root[:]) + self.check_selector('/A/@min | /A/@max', root, {'1', '10'}) def test_default_namespace(self): root = self.etree.XML('bar')