Improve node sequence operators

- Add is_root class attribute to XPathToken - '(name)' token yield AttributeNode objects for non-root tokens
2019-09-27 18:16:21 +02:00 · 2019-09-27 18:16:21 +02:00 · c367bdfd95
parent 93c6521a23
commit c367bdfd95
7 changed files with 67 additions and 98 deletions
--- a/elementpath/xpath1_parser.py
+++ b/elementpath/xpath1_parser.py
@ -198,6 +198,7 @@ class XPath1Parser(Parser):

    def parse(self, source):
        root_token = super(XPath1Parser, self).parse(source)
+        root_token.is_root = True
        try:
            root_token.evaluate()  # Static context evaluation
        except MissingContextError:
@ -233,56 +234,18 @@ literal('(string)')
 literal('(float)')
 literal('(decimal)')
 literal('(integer)')
-literal('(name)', bp=10)


-@method('(name)')
+@method(literal('(name)', bp=10))
 def evaluate(self, context=None):
-    if context is None:
-        return
-    name = self.value
-    if name[0] != '{' and self.parser.default_namespace:
-        tag = u'{%s}%s' % (self.parser.default_namespace, name)
-    else:
-        tag = name
-
-    if isinstance(context, XPathSchemaContext):
-        # Bind with the XSD type
-        xsd_type = self.match_xsd_type(context.item, name)
-        if xsd_type is not None:
-            if isinstance(context.item, AttributeNode):
-                primitive_type = self.parser.schema.get_primitive_type(xsd_type)
-                return XSD_BUILTIN_TYPES[primitive_type.local_name].value
-            else:
-                return context.item
-
-    elif self.xsd_type is None:
-        # Untyped evaluation
-        if is_attribute_node(context.item, name):
-            return context.item[1]
-        elif is_element_node(context.item, tag):
-            return context.item
-    else:
-        # XSD typed evaluation
-        try:
-            if is_attribute_node(context.item, name):
-                return self.xsd_type.decode(context.item[1])
-            elif is_element_node(context.item, tag):
-                if self.xsd_type.is_simple():
-                    return self.xsd_type.decode(context.item.text)
-                elif self.xsd_type.has_simple_content():
-                    self.xsd_type.decode(context.item.text)
-                return context.item
-
-        except (TypeError, ValueError):
-            msg = "Type {!r} is not appropriate for the context item {!r}"
-            self.wrong_context_type(msg.format(self.xsd_type, context.item))
+    return [x for x in self.select(context)] or None


@method('(name)')
 def select(self, context=None):
    if context is None:
        return
+
    name = self.value
    if name[0] != '{' and self.parser.default_namespace:
        tag = u'{%s}%s' % (self.parser.default_namespace, name)
@ -304,7 +267,7 @@ def select(self, context=None):
        # Untyped selection
        for item in context.iter_children_or_self():
            if is_attribute_node(item, name):
-                yield item[1]
+                yield item
            elif is_element_node(item, tag):
                yield item
    else:
@ -312,7 +275,7 @@ def select(self, context=None):
        for item in context.iter_children_or_self():
            try:
                if is_attribute_node(item, name):
-                    yield self.xsd_type.decode(item[1])
+                    yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
                elif is_element_node(item, tag):
                    if self.xsd_type.is_simple():
                        yield self.xsd_type.decode(item.text)
@ -652,15 +615,27 @@ def evaluate(self, context=None):

 ###
 # Union expressions
-@method(infix('|', bp=50))
+@method('|', bp=50)
+def led(self, left):
+    self.cut_and_sort = True
+    if left.symbol in {'|', 'union'}:
+        left.cut_and_sort = False
+    self[:] = left, self.parser.expression(rbp=50)
+    return self
+
+
+@method('|')
 def select(self, context=None):
-    if context is not None:
-        results = {item for k in range(2) for item in self[k].select(context.copy())}
-        for item in context.iter():
-            if item in results:
+    if context is None:
+        return
+    elif not self.cut_and_sort:
+        for k in range(2):
+            for item in self[k].select(context.copy()):
                yield item
-            elif is_attribute_node(item) and item[1] in results:
-                yield item[1]
+    else:
+        results = {item for k in range(2) for item in self[k].select(context.copy())}
+        for item in context.iter_results(results, self.is_root):
+            yield item


 ###
@ -705,8 +680,8 @@ def select(self, context=None):
        left_results = list(self[0].select(context))
        context.size = len(left_results)
        for context.position, context.item in enumerate(left_results):
-            if not is_element_node(context.item):
-                self.wrong_type("left operand must returns element nodes: {}".format(context.item))
+            if not is_xpath_node(context.item):
+                self.wrong_type("left operand must returns XPath nodes: {}".format(context.item))
            for result in self[1].select(context):
                if is_etree_element(result) or isinstance(result, tuple):
                    if result not in items:
@ -878,7 +853,7 @@ def select(self, context=None):

    for _ in context.iter_attributes():
        for result in self[0].select(context):
-            yield result
+            yield result[1] if self.is_root else result


@method(axis('namespace'))
--- a/elementpath/xpath2_constructors.py
+++ b/elementpath/xpath2_constructors.py
@ -400,9 +400,9 @@ def select(self, context=None):
            for result in self[0].select(context):
                yield result
    else:
-        attribute_name = self[0].evaluate(context) if self else None
+        name = self[0].evaluate(context) if self else None
        for item in context.iter_attributes():
-            if is_attribute_node(item, attribute_name):
+            if is_attribute_node(item, name):
                yield context.item[1]


--- a/elementpath/xpath2_functions.py
+++ b/elementpath/xpath2_functions.py
@ -646,7 +646,7 @@ def evaluate(self, context=None):

@method(function('string-join', nargs=2))
 def evaluate(self, context=None):
-    items = [self.string_value(s) if is_element_node(s) else s
+    items = [self.string_value(s) if is_element_node(s) or is_attribute_node(s) else s
             for s in self[0].select(context)]
    try:
        return self.get_argument(context, 1, cls=string_base_type).join(items)
--- a/elementpath/xpath2_parser.py
+++ b/elementpath/xpath2_parser.py
@ -339,6 +339,7 @@ class XPath2Parser(XPath1Parser):

    def parse(self, source):
        root_token = super(XPath1Parser, self).parse(source)
+        root_token.is_root = True

        if self.schema is None:
            try:
@ -381,34 +382,25 @@ register('?')
 register('(:')
 register(':)')

-
 ###
 # Node sequence composition
-@method(infix('union', bp=50))
-def select(self, context=None):
-    if context is not None:
-        results = {item for k in range(2) for item in self[k].select(context.copy())}
-        for item in context.iter():
-            if item in results:
-                yield item
+XPath2Parser.duplicate('|', 'union')


@method(infix('intersect', bp=55))
 def select(self, context=None):
    if context is not None:
        results = set(self[0].select(context.copy())) & set(self[1].select(context.copy()))
-        for item in context.iter():
-            if item in results:
-                yield item
+        for item in context.iter_results(results, self.is_root):
+            yield item


@method(infix('except', bp=55))
 def select(self, context=None):
    if context is not None:
        results = set(self[0].select(context.copy())) - set(self[1].select(context.copy()))
-        for item in context.iter():
-            if item in results:
-                yield item
+        for item in context.iter_results(results, self.is_root):
+            yield item


 ###
--- a/elementpath/xpath_nodes.py
+++ b/elementpath/xpath_nodes.py
@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
 """
 from collections import namedtuple

-from .compat import PY3, urlparse
+from .compat import PY3, urlparse, unicode_type
 from .namespaces import XML_BASE, XSI_NIL
 from .exceptions import ElementPathValueError
 from .datatypes import ncname_validator
@ -23,6 +23,9 @@ from .datatypes import ncname_validator
 AttributeNode = namedtuple('Attribute', 'name value')
 """A namedtuple-based type to represent XPath attributes."""

+ElementNode = namedtuple('Element', 'tag text attrib')
+"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
+
 NamespaceNode = namedtuple('Namespace', 'prefix uri')
 """A namedtuple-based type to represent XPath namespaces."""

@ -34,11 +37,15 @@ def is_etree_element(obj):


 def elem_iter_strings(elem):
-    for e in elem.iter():
-        if e.text is not None:
-            yield e.text
-        if e.tail is not None and e is not elem:
-            yield e.tail
+    if isinstance(elem, ElementNode):
+        if elem.text is not None:
+            yield unicode_type(elem.text)
+    else:
+        for e in elem.iter():
+            if e.text is not None:
+                yield e.text
+            if e.tail is not None and e is not elem:
+                yield e.tail


 ###
@ -51,7 +58,7 @@ def elem_iter_strings(elem):
 # Element-like objects are used for representing elements and comments, ElementTree-like objects
 # for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
 ###
-def is_element_node(obj, tag=None, default_namespace=None):
+def is_element_node(obj, tag=None):
    """
    Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
    Raises a ValueError if the argument tag has to be used but it's in a wrong format.
--- a/elementpath/xpath_token.py
+++ b/elementpath/xpath_token.py
@ -24,7 +24,7 @@ import locale
 import contextlib
 from decimal import Decimal

-from .compat import string_base_type
+from .compat import string_base_type, unicode_type
 from .exceptions import xpath_error
 from .namespaces import XQT_ERRORS_NAMESPACE
 from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
@ -53,8 +53,9 @@ def ordinal(n):
 class XPathToken(Token):
    """Base class for XPath tokens."""

-    comment = None   # for XPath 2.0+ comments
-    xsd_type = None  # fox XPath 2.0+ schema types labeling
+    is_root = False   # Flag that is set to True for root token instances
+    comment = None    # for XPath 2.0+ comments
+    xsd_type = None   # fox XPath 2.0+ schema types labeling

    def evaluate(self, context=None):
        """
@ -423,33 +424,22 @@ class XPathToken(Token):
            locale.setlocale(locale.LC_ALL, default_locale)

    ###
-    # XPath data conversion base functions
+    # XPath data accessors base functions
    def data_value(self, obj):
        """
        The typed value, as computed by fn:data() on each item. Returns an instance of
        UntypedAtomic.
        """
+        if is_attribute_node(obj):
+            obj = obj[1]
+
        if obj is None:
            return
        elif not is_xpath_node(obj):
            return obj
        elif hasattr(obj, 'type'):
            return self.schema_node_value(obj)  # Schema context
-        elif self.xsd_type is None:
-            return UntypedAtomic(self.string_value(obj))
-
-        # XSD type bound data
-        try:
-            if is_attribute_node(obj):
-                return self.xsd_type.decode(obj[1])
-            elif is_element_node(obj):
-                return self.xsd_type.decode(obj.text)
-        except TypeError as err:
-            self.wrong_type(str(err))
-        except ValueError as err:
-            self.wrong_value(str(err))
-        else:
-            return UntypedAtomic(self.string_value(obj))
+        return UntypedAtomic(self.string_value(obj))

    def boolean_value(self, obj):
        """
@ -481,7 +471,7 @@ class XPathToken(Token):
        elif is_element_node(obj):
            return ''.join(elem_iter_strings(obj))
        elif is_attribute_node(obj):
-            return obj[1]
+            return unicode_type(obj[1])
        elif is_text_node(obj):
            return obj
        elif is_document_node(obj):
--- a/tests/test_xpath1_parser.py
+++ b/tests/test_xpath1_parser.py
@ -811,6 +811,9 @@ class XPath1ParserTest(unittest.TestCase):
        self.check_value("1 and 1", True)
        self.check_value("1 and 'jupiter'", True)
        self.check_value("0 and 'mars'", False)
+
+        self.check_value("mars")
+
        self.check_value("1 and mars", False)

    def test_comparison_operators(self):
@ -1160,9 +1163,11 @@ class XPath1ParserTest(unittest.TestCase):
        self.check_value("a[preceding::a[not(b)]]", [], context=XPathContext(root, item=root[1]))

    def test_union(self):
-        root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
+        root = self.etree.XML('<A min="1" max="10"><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
        self.check_selector('/A/B2 | /A/B1', root, root[:2])
        self.check_selector('/A/B2 | /A/*', root, root[:])
+        self.check_selector('/A/B2 | /A/* | /A/B1', root, root[:])
+        self.check_selector('/A/@min | /A/@max', root, {'1', '10'})

    def test_default_namespace(self):
        root = self.etree.XML('<foo>bar</foo>')