Add typed nodes for XSD decoded values processing

- Added TypedAttribute and TypedElement nametuples
This commit is contained in:
Davide Brunato 2019-09-28 09:15:17 +02:00
parent c367bdfd95
commit 3cd4c95272
5 changed files with 58 additions and 42 deletions

View File

@ -297,8 +297,11 @@ class XMLSchemaProxy(AbstractSchemaProxy):
def get_primitive_type(self, xsd_type):
if not xsd_type.is_simple():
return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE]
elif not hasattr(xsd_type, 'primitive_type'):
if not xsd_type.has_simple_content():
return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE]
xsd_type = xsd_type.content_type
if not hasattr(xsd_type, 'primitive_type'):
if xsd_type.base_type is None:
return xsd_type
return self.get_primitive_type(xsd_type.base_type)

View File

@ -21,8 +21,8 @@ from .tdop_parser import Parser, MultiLabel
from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
from .xpath_token import XPathToken
from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
from .xpath_nodes import AttributeNode, NamespaceNode, TypedAttribute, TypedElement,\
is_etree_element, is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
is_text_node, is_comment_node, is_processing_instruction_node, node_name
XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
@ -238,7 +238,7 @@ literal('(integer)')
@method(literal('(name)', bp=10))
def evaluate(self, context=None):
return [x for x in self.select(context)] or None
return [x for x in self.select(context)]
@method('(name)')
@ -257,11 +257,12 @@ def select(self, context=None):
for item in context.iter_children_or_self():
xsd_type = self.match_xsd_type(item, name)
if xsd_type is not None:
if isinstance(context.item, AttributeNode):
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
yield XSD_BUILTIN_TYPES[primitive_type.local_name].value
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
value = XSD_BUILTIN_TYPES[primitive_type.local_name].value
if isinstance(item, AttributeNode):
yield TypedAttribute(item, value)
else:
yield context.item
yield TypedElement(item, value)
elif self.xsd_type is None:
# Untyped selection
@ -275,13 +276,10 @@ def select(self, context=None):
for item in context.iter_children_or_self():
try:
if is_attribute_node(item, name):
yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
yield TypedAttribute(item, self.xsd_type.decode(item[1]))
elif is_element_node(item, tag):
if self.xsd_type.is_simple():
yield self.xsd_type.decode(item.text)
elif self.xsd_type.has_simple_content():
self.xsd_type.decode(item.text)
yield item
if self.xsd_type.is_simple() or self.xsd_type.has_simple_content():
yield TypedElement(item, self.xsd_type.decode(item.text))
else:
yield item
except (TypeError, ValueError):

View File

@ -11,8 +11,8 @@
import datetime
from .exceptions import ElementPathTypeError
from .xpath_nodes import AttributeNode, is_etree_element, is_element_node, \
is_document_node, is_attribute_node
from .xpath_nodes import AttributeNode, TypedAttribute, TypedElement, is_etree_element, \
is_element_node, is_document_node, is_attribute_node
class XPathContext(object):
@ -107,6 +107,9 @@ class XPathContext(object):
status = self.item, self.size, self.position, self.axis
self.axis = 'attribute'
if isinstance(self.item, TypedElement):
self.item = self.item.elem
for item in self.item.attrib.items():
self.item = AttributeNode(*item)
yield self.item
@ -129,7 +132,10 @@ class XPathContext(object):
self.item = self._root.getroot() if is_document_node(self._root) else self._root
yield self.item
elif is_element_node(self.item):
elem = self.item
if isinstance(self.item, TypedElement):
elem = self.item.elem
else:
elem = self.item
if elem.text is not None:
self.item = elem.text
yield self.item
@ -258,8 +264,9 @@ class XPathContext(object):
yield item
elif isinstance(item, AttributeNode):
# Match XSD decoded attributes
for attr in filter(lambda x: isinstance(x, AttributeNode) and x[0] == item[0], results):
yield attr[1] if is_root else attr
for attr in filter(lambda x: isinstance(x, TypedAttribute), results):
if attr[0] in results:
yield attr[1] if is_root else attr
self.item, self.size, self.position = status

View File

@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
from .compat import PY3, urlparse, unicode_type
from .compat import PY3, urlparse
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError
from .datatypes import ncname_validator
@ -23,12 +23,15 @@ from .datatypes import ncname_validator
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
ElementNode = namedtuple('Element', 'tag text attrib')
"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
TypedAttribute = namedtuple('TypedAttribute', 'attr value')
"""A wrapper for processing typed-value attributes."""
TypedElement = namedtuple('TypedElement', 'elem value')
"""A wrapper for processing typed-value elements."""
###
# Utility functions for ElementTree's Element instances
@ -37,15 +40,14 @@ def is_etree_element(obj):
def elem_iter_strings(elem):
if isinstance(elem, ElementNode):
if elem.text is not None:
yield unicode_type(elem.text)
else:
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
if isinstance(elem, TypedElement):
elem = elem.elem
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
###
@ -67,9 +69,12 @@ def is_element_node(obj, tag=None):
:param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if not is_etree_element(obj) or callable(obj.tag):
if isinstance(obj, TypedElement):
obj = obj.elem
elif not is_etree_element(obj) or callable(obj.tag):
return False
elif tag is None:
if not tag:
return True
elif not obj.tag:
return obj.tag == tag
@ -100,10 +105,13 @@ def is_attribute_node(obj, name=None):
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if name is None or name == '*' or name == '*:*':
return isinstance(obj, AttributeNode)
elif not isinstance(obj, AttributeNode):
return isinstance(obj, (AttributeNode, TypedAttribute))
elif not isinstance(obj, (AttributeNode, TypedAttribute)):
return False
elif name[0] == '*':
elif isinstance(obj, TypedAttribute):
obj = obj.attr
if name[0] == '*':
try:
_, _name = name.split(':')
except (ValueError, IndexError):

View File

@ -397,7 +397,10 @@ class XPath1ParserTest(unittest.TestCase):
self.wrong_type("contains('XPath', 'XP', 20)")
self.wrong_type("boolean(1, 5)")
# Features tests
# XPath expression tests
def test_node_selection(self):
self.check_value("mars", [])
def test_references(self):
namespaces = {'tst': "http://xpath.test/ns"}
root = self.etree.XML("""
@ -811,9 +814,6 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("1 and 1", True)
self.check_value("1 and 'jupiter'", True)
self.check_value("0 and 'mars'", False)
self.check_value("mars")
self.check_value("1 and mars", False)
def test_comparison_operators(self):