Add typed nodes for XSD decoded values processing

- Added TypedAttribute and TypedElement nametuples
2019-09-28 09:15:17 +02:00 · 2019-09-28 09:15:17 +02:00 · 3cd4c95272
parent c367bdfd95
commit 3cd4c95272
5 changed files with 58 additions and 42 deletions
--- a/elementpath/schema_proxy.py
+++ b/elementpath/schema_proxy.py
@ -297,8 +297,11 @@ class XMLSchemaProxy(AbstractSchemaProxy):

    def get_primitive_type(self, xsd_type):
        if not xsd_type.is_simple():
-            return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE]
-        elif not hasattr(xsd_type, 'primitive_type'):
+            if not xsd_type.has_simple_content():
+                return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE]
+            xsd_type = xsd_type.content_type
+
+        if not hasattr(xsd_type, 'primitive_type'):
            if xsd_type.base_type is None:
                return xsd_type
            return self.get_primitive_type(xsd_type.base_type)
--- a/elementpath/xpath1_parser.py
+++ b/elementpath/xpath1_parser.py
@ -21,8 +21,8 @@ from .tdop_parser import Parser, MultiLabel
 from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
    XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
 from .xpath_token import XPathToken
-from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
-    is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
+from .xpath_nodes import AttributeNode, NamespaceNode, TypedAttribute, TypedElement,\
+    is_etree_element, is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
    is_text_node, is_comment_node, is_processing_instruction_node, node_name

 XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
@ -238,7 +238,7 @@ literal('(integer)')

@method(literal('(name)', bp=10))
 def evaluate(self, context=None):
-    return [x for x in self.select(context)] or None
+    return [x for x in self.select(context)]


@method('(name)')
@ -257,11 +257,12 @@ def select(self, context=None):
        for item in context.iter_children_or_self():
            xsd_type = self.match_xsd_type(item, name)
            if xsd_type is not None:
-                if isinstance(context.item, AttributeNode):
-                    primitive_type = self.parser.schema.get_primitive_type(xsd_type)
-                    yield XSD_BUILTIN_TYPES[primitive_type.local_name].value
+                primitive_type = self.parser.schema.get_primitive_type(xsd_type)
+                value = XSD_BUILTIN_TYPES[primitive_type.local_name].value
+                if isinstance(item, AttributeNode):
+                    yield TypedAttribute(item, value)
                else:
-                    yield context.item
+                    yield TypedElement(item, value)

    elif self.xsd_type is None:
        # Untyped selection
@ -275,13 +276,10 @@ def select(self, context=None):
        for item in context.iter_children_or_self():
            try:
                if is_attribute_node(item, name):
-                    yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
+                    yield TypedAttribute(item, self.xsd_type.decode(item[1]))
                elif is_element_node(item, tag):
-                    if self.xsd_type.is_simple():
-                        yield self.xsd_type.decode(item.text)
-                    elif self.xsd_type.has_simple_content():
-                        self.xsd_type.decode(item.text)
-                        yield item
+                    if self.xsd_type.is_simple() or self.xsd_type.has_simple_content():
+                        yield TypedElement(item, self.xsd_type.decode(item.text))
                    else:
                        yield item
            except (TypeError, ValueError):
--- a/elementpath/xpath_context.py
+++ b/elementpath/xpath_context.py
@ -11,8 +11,8 @@
 import datetime

 from .exceptions import ElementPathTypeError
-from .xpath_nodes import AttributeNode, is_etree_element, is_element_node, \
-    is_document_node, is_attribute_node
+from .xpath_nodes import AttributeNode, TypedAttribute, TypedElement, is_etree_element, \
+    is_element_node, is_document_node, is_attribute_node


 class XPathContext(object):
@ -107,6 +107,9 @@ class XPathContext(object):
        status = self.item, self.size, self.position, self.axis
        self.axis = 'attribute'

+        if isinstance(self.item, TypedElement):
+            self.item = self.item.elem
+
        for item in self.item.attrib.items():
            self.item = AttributeNode(*item)
            yield self.item
@ -129,7 +132,10 @@ class XPathContext(object):
            self.item = self._root.getroot() if is_document_node(self._root) else self._root
            yield self.item
        elif is_element_node(self.item):
-            elem = self.item
+            if isinstance(self.item, TypedElement):
+                elem = self.item.elem
+            else:
+                elem = self.item
            if elem.text is not None:
                self.item = elem.text
                yield self.item
@ -258,8 +264,9 @@ class XPathContext(object):
                    yield item
            elif isinstance(item, AttributeNode):
                # Match XSD decoded attributes
-                for attr in filter(lambda x: isinstance(x, AttributeNode) and x[0] == item[0], results):
-                    yield attr[1] if is_root else attr
+                for attr in filter(lambda x: isinstance(x, TypedAttribute), results):
+                    if attr[0] in results:
+                        yield attr[1] if is_root else attr

        self.item, self.size, self.position = status

--- a/elementpath/xpath_nodes.py
+++ b/elementpath/xpath_nodes.py
@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
 """
 from collections import namedtuple

-from .compat import PY3, urlparse, unicode_type
+from .compat import PY3, urlparse
 from .namespaces import XML_BASE, XSI_NIL
 from .exceptions import ElementPathValueError
 from .datatypes import ncname_validator
@ -23,12 +23,15 @@ from .datatypes import ncname_validator
 AttributeNode = namedtuple('Attribute', 'name value')
 """A namedtuple-based type to represent XPath attributes."""

-ElementNode = namedtuple('Element', 'tag text attrib')
-"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
-
 NamespaceNode = namedtuple('Namespace', 'prefix uri')
 """A namedtuple-based type to represent XPath namespaces."""

+TypedAttribute = namedtuple('TypedAttribute', 'attr value')
+"""A wrapper for processing typed-value attributes."""
+
+TypedElement = namedtuple('TypedElement', 'elem value')
+"""A wrapper for processing typed-value elements."""
+

 ###
 # Utility functions for ElementTree's Element instances
@ -37,15 +40,14 @@ def is_etree_element(obj):


 def elem_iter_strings(elem):
-    if isinstance(elem, ElementNode):
-        if elem.text is not None:
-            yield unicode_type(elem.text)
-    else:
-        for e in elem.iter():
-            if e.text is not None:
-                yield e.text
-            if e.tail is not None and e is not elem:
-                yield e.tail
+    if isinstance(elem, TypedElement):
+        elem = elem.elem
+
+    for e in elem.iter():
+        if e.text is not None:
+            yield e.text
+        if e.tail is not None and e is not elem:
+            yield e.tail


 ###
@ -67,9 +69,12 @@ def is_element_node(obj, tag=None):
    :param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
    are '*', '*:*', '*:local-name' and '{namespace}*'.
    """
-    if not is_etree_element(obj) or callable(obj.tag):
+    if isinstance(obj, TypedElement):
+        obj = obj.elem
+    elif not is_etree_element(obj) or callable(obj.tag):
        return False
-    elif tag is None:
+
+    if not tag:
        return True
    elif not obj.tag:
        return obj.tag == tag
@ -100,10 +105,13 @@ def is_attribute_node(obj, name=None):
    are '*', '*:*', '*:local-name' and '{namespace}*'.
    """
    if name is None or name == '*' or name == '*:*':
-        return isinstance(obj, AttributeNode)
-    elif not isinstance(obj, AttributeNode):
+        return isinstance(obj, (AttributeNode, TypedAttribute))
+    elif not isinstance(obj, (AttributeNode, TypedAttribute)):
        return False
-    elif name[0] == '*':
+    elif isinstance(obj, TypedAttribute):
+        obj = obj.attr
+
+    if name[0] == '*':
        try:
            _, _name = name.split(':')
        except (ValueError, IndexError):
--- a/tests/test_xpath1_parser.py
+++ b/tests/test_xpath1_parser.py
@ -397,7 +397,10 @@ class XPath1ParserTest(unittest.TestCase):
        self.wrong_type("contains('XPath', 'XP', 20)")
        self.wrong_type("boolean(1, 5)")

-    # Features tests
+    # XPath expression tests
+    def test_node_selection(self):
+        self.check_value("mars", [])
+
    def test_references(self):
        namespaces = {'tst': "http://xpath.test/ns"}
        root = self.etree.XML("""
@ -811,9 +814,6 @@ class XPath1ParserTest(unittest.TestCase):
        self.check_value("1 and 1", True)
        self.check_value("1 and 'jupiter'", True)
        self.check_value("0 and 'mars'", False)
-
-        self.check_value("mars")
-
        self.check_value("1 and mars", False)

    def test_comparison_operators(self):