From 3cd4c95272e874b656f27dcaaf15b2870829a7c9 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 28 Sep 2019 09:15:17 +0200 Subject: [PATCH] Add typed nodes for XSD decoded values processing - Added TypedAttribute and TypedElement nametuples --- elementpath/schema_proxy.py | 7 ++++-- elementpath/xpath1_parser.py | 24 +++++++++----------- elementpath/xpath_context.py | 17 ++++++++++---- elementpath/xpath_nodes.py | 44 +++++++++++++++++++++--------------- tests/test_xpath1_parser.py | 8 +++---- 5 files changed, 58 insertions(+), 42 deletions(-) diff --git a/elementpath/schema_proxy.py b/elementpath/schema_proxy.py index c38861f..96ea4a9 100644 --- a/elementpath/schema_proxy.py +++ b/elementpath/schema_proxy.py @@ -297,8 +297,11 @@ class XMLSchemaProxy(AbstractSchemaProxy): def get_primitive_type(self, xsd_type): if not xsd_type.is_simple(): - return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE] - elif not hasattr(xsd_type, 'primitive_type'): + if not xsd_type.has_simple_content(): + return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE] + xsd_type = xsd_type.content_type + + if not hasattr(xsd_type, 'primitive_type'): if xsd_type.base_type is None: return xsd_type return self.get_primitive_type(xsd_type.base_type) diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index b815d28..27deed7 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -21,8 +21,8 @@ from .tdop_parser import Parser, MultiLabel from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \ XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed from .xpath_token import XPathToken -from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \ - is_xpath_node, is_element_node, is_document_node, is_attribute_node, \ +from .xpath_nodes import AttributeNode, NamespaceNode, TypedAttribute, TypedElement,\ + is_etree_element, is_xpath_node, is_element_node, is_document_node, is_attribute_node, \ is_text_node, is_comment_node, is_processing_instruction_node, node_name XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF" @@ -238,7 +238,7 @@ literal('(integer)') @method(literal('(name)', bp=10)) def evaluate(self, context=None): - return [x for x in self.select(context)] or None + return [x for x in self.select(context)] @method('(name)') @@ -257,11 +257,12 @@ def select(self, context=None): for item in context.iter_children_or_self(): xsd_type = self.match_xsd_type(item, name) if xsd_type is not None: - if isinstance(context.item, AttributeNode): - primitive_type = self.parser.schema.get_primitive_type(xsd_type) - yield XSD_BUILTIN_TYPES[primitive_type.local_name].value + primitive_type = self.parser.schema.get_primitive_type(xsd_type) + value = XSD_BUILTIN_TYPES[primitive_type.local_name].value + if isinstance(item, AttributeNode): + yield TypedAttribute(item, value) else: - yield context.item + yield TypedElement(item, value) elif self.xsd_type is None: # Untyped selection @@ -275,13 +276,10 @@ def select(self, context=None): for item in context.iter_children_or_self(): try: if is_attribute_node(item, name): - yield AttributeNode(item[0], self.xsd_type.decode(item[1])) + yield TypedAttribute(item, self.xsd_type.decode(item[1])) elif is_element_node(item, tag): - if self.xsd_type.is_simple(): - yield self.xsd_type.decode(item.text) - elif self.xsd_type.has_simple_content(): - self.xsd_type.decode(item.text) - yield item + if self.xsd_type.is_simple() or self.xsd_type.has_simple_content(): + yield TypedElement(item, self.xsd_type.decode(item.text)) else: yield item except (TypeError, ValueError): diff --git a/elementpath/xpath_context.py b/elementpath/xpath_context.py index 165c821..0ea1612 100644 --- a/elementpath/xpath_context.py +++ b/elementpath/xpath_context.py @@ -11,8 +11,8 @@ import datetime from .exceptions import ElementPathTypeError -from .xpath_nodes import AttributeNode, is_etree_element, is_element_node, \ - is_document_node, is_attribute_node +from .xpath_nodes import AttributeNode, TypedAttribute, TypedElement, is_etree_element, \ + is_element_node, is_document_node, is_attribute_node class XPathContext(object): @@ -107,6 +107,9 @@ class XPathContext(object): status = self.item, self.size, self.position, self.axis self.axis = 'attribute' + if isinstance(self.item, TypedElement): + self.item = self.item.elem + for item in self.item.attrib.items(): self.item = AttributeNode(*item) yield self.item @@ -129,7 +132,10 @@ class XPathContext(object): self.item = self._root.getroot() if is_document_node(self._root) else self._root yield self.item elif is_element_node(self.item): - elem = self.item + if isinstance(self.item, TypedElement): + elem = self.item.elem + else: + elem = self.item if elem.text is not None: self.item = elem.text yield self.item @@ -258,8 +264,9 @@ class XPathContext(object): yield item elif isinstance(item, AttributeNode): # Match XSD decoded attributes - for attr in filter(lambda x: isinstance(x, AttributeNode) and x[0] == item[0], results): - yield attr[1] if is_root else attr + for attr in filter(lambda x: isinstance(x, TypedAttribute), results): + if attr[0] in results: + yield attr[1] if is_root else attr self.item, self.size, self.position = status diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py index 85f15c4..a588e2c 100644 --- a/elementpath/xpath_nodes.py +++ b/elementpath/xpath_nodes.py @@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types. """ from collections import namedtuple -from .compat import PY3, urlparse, unicode_type +from .compat import PY3, urlparse from .namespaces import XML_BASE, XSI_NIL from .exceptions import ElementPathValueError from .datatypes import ncname_validator @@ -23,12 +23,15 @@ from .datatypes import ncname_validator AttributeNode = namedtuple('Attribute', 'name value') """A namedtuple-based type to represent XPath attributes.""" -ElementNode = namedtuple('Element', 'tag text attrib') -"""A namedtuple-based type to represent XPath element simple and simple-content nodes.""" - NamespaceNode = namedtuple('Namespace', 'prefix uri') """A namedtuple-based type to represent XPath namespaces.""" +TypedAttribute = namedtuple('TypedAttribute', 'attr value') +"""A wrapper for processing typed-value attributes.""" + +TypedElement = namedtuple('TypedElement', 'elem value') +"""A wrapper for processing typed-value elements.""" + ### # Utility functions for ElementTree's Element instances @@ -37,15 +40,14 @@ def is_etree_element(obj): def elem_iter_strings(elem): - if isinstance(elem, ElementNode): - if elem.text is not None: - yield unicode_type(elem.text) - else: - for e in elem.iter(): - if e.text is not None: - yield e.text - if e.tail is not None and e is not elem: - yield e.tail + if isinstance(elem, TypedElement): + elem = elem.elem + + for e in elem.iter(): + if e.text is not None: + yield e.text + if e.tail is not None and e is not elem: + yield e.tail ### @@ -67,9 +69,12 @@ def is_element_node(obj, tag=None): :param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \ are '*', '*:*', '*:local-name' and '{namespace}*'. """ - if not is_etree_element(obj) or callable(obj.tag): + if isinstance(obj, TypedElement): + obj = obj.elem + elif not is_etree_element(obj) or callable(obj.tag): return False - elif tag is None: + + if not tag: return True elif not obj.tag: return obj.tag == tag @@ -100,10 +105,13 @@ def is_attribute_node(obj, name=None): are '*', '*:*', '*:local-name' and '{namespace}*'. """ if name is None or name == '*' or name == '*:*': - return isinstance(obj, AttributeNode) - elif not isinstance(obj, AttributeNode): + return isinstance(obj, (AttributeNode, TypedAttribute)) + elif not isinstance(obj, (AttributeNode, TypedAttribute)): return False - elif name[0] == '*': + elif isinstance(obj, TypedAttribute): + obj = obj.attr + + if name[0] == '*': try: _, _name = name.split(':') except (ValueError, IndexError): diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py index ab39cba..8df9f5e 100644 --- a/tests/test_xpath1_parser.py +++ b/tests/test_xpath1_parser.py @@ -397,7 +397,10 @@ class XPath1ParserTest(unittest.TestCase): self.wrong_type("contains('XPath', 'XP', 20)") self.wrong_type("boolean(1, 5)") - # Features tests + # XPath expression tests + def test_node_selection(self): + self.check_value("mars", []) + def test_references(self): namespaces = {'tst': "http://xpath.test/ns"} root = self.etree.XML(""" @@ -811,9 +814,6 @@ class XPath1ParserTest(unittest.TestCase): self.check_value("1 and 1", True) self.check_value("1 and 'jupiter'", True) self.check_value("0 and 'mars'", False) - - self.check_value("mars") - self.check_value("1 and mars", False) def test_comparison_operators(self):