From 77afa93a15c2e1ee31eb47240c34fe99ba642515 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 26 Sep 2019 16:24:29 +0200 Subject: [PATCH] Add validate() to XSD type interface --- elementpath/schema_proxy.py | 12 +++++++- elementpath/xpath1_parser.py | 47 +++++++++++++++++++----------- elementpath/xpath2_constructors.py | 4 ++- elementpath/xpath2_parser.py | 15 +++++----- elementpath/xpath_nodes.py | 2 +- elementpath/xpath_token.py | 15 +++++++++- tests/test_package.py | 4 +-- tests/test_schema_proxy.py | 18 ++++++++++-- tests/test_xpath1_parser.py | 10 +++++++ 9 files changed, 94 insertions(+), 33 deletions(-) diff --git a/elementpath/schema_proxy.py b/elementpath/schema_proxy.py index 32a529b..c38861f 100644 --- a/elementpath/schema_proxy.py +++ b/elementpath/schema_proxy.py @@ -106,10 +106,20 @@ class AbstractXsdType(AbstractXsdComponent): `False` otherwise. """ + @abstractmethod + def validate(self, obj, *args, **kwargs): + """ + Validates an XML object node using the XSD type. The argument *obj* is an element + for complex type nodes or a text value for simple type nodes. Raises a `ValueError` + compatible exception (a `ValueError` or a subclass of it) if the argument is not valid. + """ + @abstractmethod def decode(self, obj, *args, **kwargs): """ - Decodes XML data using the XSD type. + Decodes an XML object node using the XSD type. The argument *obj* is an element + for complex type nodes or a text value for simple type nodes. Raises a `ValueError` + or a `TypeError` compatible exception if the argument it's not valid. """ diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index 3f05cf8..1b427a0 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -242,9 +242,12 @@ def evaluate(self, context=None): return name = self.value if name[0] != '{' and self.parser.default_namespace: - name = u'{%s}%s' % (self.parser.default_namespace, name) + tag = u'{%s}%s' % (self.parser.default_namespace, name) + else: + tag = name if isinstance(context, XPathSchemaContext): + # Bind with the XSD type xsd_type = self.match_xsd_type(context.item, name) if xsd_type is not None: if isinstance(context.item, AttributeNode): @@ -254,21 +257,26 @@ def evaluate(self, context=None): return context.item elif self.xsd_type is None: + # Untyped evaluation if is_attribute_node(context.item, name): return context.item[1] - elif is_element_node(context.item, name): + elif is_element_node(context.item, tag): return context.item else: + # XSD typed evaluation try: if is_attribute_node(context.item, name): return self.xsd_type.decode(context.item[1]) - elif is_element_node(context.item, name): + elif is_element_node(context.item, tag): if self.xsd_type.is_simple(): - return self.xsd_type.decode(context.item) - else: - return context.item + return self.xsd_type.decode(context.item.text) + elif self.xsd_type.has_simple_content(): + self.xsd_type.decode(context.item.text) + return context.item + except (TypeError, ValueError): - self.wrong_context_type("Type %r is not appropriate for the context" % (type(context.item))) + msg = "Type {!r} is not appropriate for the context item {!r}" + self.wrong_context_type(msg.format(self.xsd_type, context.item)) @method('(name)') @@ -277,9 +285,12 @@ def select(self, context=None): return name = self.value if name[0] != '{' and self.parser.default_namespace: - name = u'{%s}%s' % (self.parser.default_namespace, name) + tag = u'{%s}%s' % (self.parser.default_namespace, name) + else: + tag = name if isinstance(context, XPathSchemaContext): + # Bind with the XSD type for item in context.iter_children_or_self(): xsd_type = self.match_xsd_type(item, name) if xsd_type is not None: @@ -294,20 +305,22 @@ def select(self, context=None): for item in context.iter_children_or_self(): if is_attribute_node(item, name): yield item[1] - elif is_element_node(item, name): + elif is_element_node(item, tag): yield item else: - # Typed selection + # XSD typed selection for item in context.iter_children_or_self(): try: if is_attribute_node(item, name): yield self.xsd_type.decode(item[1]) - elif is_element_node(item, name): + elif is_element_node(item, tag): if self.xsd_type.is_simple(): - self.xsd_type.validate(item.text) + yield self.xsd_type.decode(item.text) + elif self.xsd_type.has_simple_content(): + self.xsd_type.decode(item.text) + yield item else: - self.xsd_type.validate(item) - yield item + yield item except (TypeError, ValueError): msg = "Type {!r} does not match sequence type of {!r}" self.wrong_sequence_type(msg.format(self.xsd_type, item)) @@ -754,13 +767,13 @@ def led(self, left): @method('[') def select(self, context=None): if context is not None: - for position, _ in enumerate(self[0].select(context), start=1): + for position, item in enumerate(self[0].select(context), start=1): predicate = list(self[1].select(context.copy())) if len(predicate) == 1 and isinstance(predicate[0], NumericTypeProxy): if position == predicate[0]: - yield context.item + yield item elif self.boolean_value(predicate): - yield context.item + yield item ### diff --git a/elementpath/xpath2_constructors.py b/elementpath/xpath2_constructors.py index d9af685..38b7949 100644 --- a/elementpath/xpath2_constructors.py +++ b/elementpath/xpath2_constructors.py @@ -214,6 +214,8 @@ def cast(value): def cast(value, tz=None): if isinstance(value, Date10): return value + elif isinstance(value, UntypedAtomic): + return Date10.fromstring(str(value), tzinfo=tz) return Date10.fromstring(value, tzinfo=tz) @@ -419,7 +421,7 @@ def cast_to_boolean(value, context=None): elif isinstance(value, (int, float, decimal.Decimal)): return bool(value) elif isinstance(value, UntypedAtomic): - value = string_base_type(value) + value = unicode_type(value) elif not isinstance(value, string_base_type): raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value)) diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py index 3f80090..07e05d3 100644 --- a/elementpath/xpath2_parser.py +++ b/elementpath/xpath2_parser.py @@ -18,7 +18,7 @@ import decimal import math import operator -from .compat import MutableSequence, urlparse +from .compat import MutableSequence, urlparse, unicode_type from .exceptions import ElementPathError, ElementPathKeyError, \ ElementPathTypeError, MissingContextError from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, \ @@ -639,9 +639,10 @@ def evaluate(self, context=None): else: self.wrong_context_type("an atomic value is required") + input_value = self.data_value(result[0]) try: if namespace != XSD_NAMESPACE: - value = self.parser.schema.cast_as(result[0], atomic_type) + value = self.parser.schema.cast_as(input_value, atomic_type) else: local_name = atomic_type.split('}')[1] token_class = self.parser.symbol_table.get(local_name) @@ -649,13 +650,13 @@ def evaluate(self, context=None): self.unknown_atomic_type("atomic type %r not found in the in-scope schema types" % self[1].source) if local_name in {'base64Binary', 'hexBinary'}: - value = token_class.cast(result[0], self[0].label == 'literal') + value = token_class.cast(input_value, self[0].label == 'literal') elif local_name in {'dateTime', 'date', 'gDay', 'gMonth', 'gMonthDay', 'gYear', 'gYearMonth', 'time'}: - value = token_class.cast(result[0], tz=None if context is None else context.timezone) + value = token_class.cast(input_value, tz=None if context is None else context.timezone) elif local_name == 'QName': - value = token_class.cast(result[0], self.parser.namespaces) + value = token_class.cast(input_value, self.parser.namespaces) else: - value = token_class.cast(result[0]) + value = token_class.cast(input_value) except ElementPathError as err: if self.symbol != 'cast': @@ -668,7 +669,7 @@ def evaluate(self, context=None): except TypeError as err: if self.symbol != 'cast': return False - self.wrong_type(str(err)) + self.wrong_type(unicode_type(err)) except ValueError as err: if self.symbol != 'cast': return False diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py index 330a2af..eb1f4ed 100644 --- a/elementpath/xpath_nodes.py +++ b/elementpath/xpath_nodes.py @@ -51,7 +51,7 @@ def elem_iter_strings(elem): # Element-like objects are used for representing elements and comments, ElementTree-like objects # for documents. Generic tuples are used for representing attributes and named-tuples for namespaces. ### -def is_element_node(obj, tag=None): +def is_element_node(obj, tag=None, default_namespace=None): """ Returns `True` if the first argument is an element node matching the tag, `False` otherwise. Raises a ValueError if the argument tag has to be used but it's in a wrong format. diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py index 4b90443..f1bf231 100644 --- a/elementpath/xpath_token.py +++ b/elementpath/xpath_token.py @@ -434,7 +434,20 @@ class XPathToken(Token): elif not is_xpath_node(obj): return obj elif hasattr(obj, 'type'): - return self.schema_node_value(obj) + return self.schema_node_value(obj) # Schema context + elif self.xsd_type is None: + return UntypedAtomic(self.string_value(obj)) + + # XSD type bound data + try: + if is_attribute_node(obj): + return self.xsd_type.decode(obj[1]) + elif is_element_node(obj): + return self.xsd_type.decode(obj.text) + except TypeError as err: + self.wrong_type(str(err)) + except ValueError as err: + self.wrong_value(str(err)) else: return UntypedAtomic(self.string_value(obj)) diff --git a/tests/test_package.py b/tests/test_package.py index f566880..b51e810 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -27,7 +27,7 @@ class PackageTest(unittest.TestCase): cls.get_version = re.compile(r"(?:\bversion|__version__)(?:\s*=\s*)(\'[^\']*\'|\"[^\"]*\")") def test_missing_debug_statements(self): - message = "\nFound a debug missing statement at line %d or file %r: %r" + message = "\nFound a debug missing statement at line %d of file %r: %r" filename = None for line in fileinput.input(glob.glob(os.path.join(self.source_dir, '*.py'))): if fileinput.isfirstline(): @@ -38,7 +38,7 @@ class PackageTest(unittest.TestCase): self.assertIsNone(match, message % (lineno, filename, match.group(0) if match else None)) def test_version_matching(self): - message = "\nFound a different version at line %d or file %r: %r (maybe %r)." + message = "\nFound a different version at line %d of file %r: %r (maybe %r)." files = [ os.path.join(self.source_dir, '__init__.py'), os.path.join(self.package_dir, 'setup.py'), diff --git a/tests/test_schema_proxy.py b/tests/test_schema_proxy.py index 1f80230..bcd1d07 100644 --- a/tests/test_schema_proxy.py +++ b/tests/test_schema_proxy.py @@ -244,14 +244,26 @@ class XPath2ParserXMLSchemaTest(test_xpath2_parser.XPath2ParserTest): self.assertEqual(token[0][1].xsd_type, schema.types['rangeType']) self.assertEqual(token[1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE]) - context = XPathContext( - root=self.etree.XML('')) token = parser.parse("//b/@min lt //b/@max") self.assertEqual(token[0][0][0].xsd_type, schema.types['rangeType']) self.assertEqual(token[0][1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE]) self.assertEqual(token[1][0][0].xsd_type, schema.types['rangeType']) self.assertEqual(token[1][1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE]) - self.assertIsNone(token.evaluate(context)) + + root = self.etree.XML('') + with self.assertRaises(TypeError): + token.evaluate(context=XPathContext(root)) + + root = self.etree.XML('30') + self.assertIsNone(token.evaluate(context=XPathContext(root))) + + root = self.etree.XML('30') + context = XPathContext(root) + self.assertTrue(token.evaluate(context)) + + root = self.etree.XML('30') + context = XPathContext(root) + self.assertFalse(token.evaluate(context)) def test_instance_of_expression(self): element = self.etree.Element('schema') diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py index 422f560..2121d04 100644 --- a/tests/test_xpath1_parser.py +++ b/tests/test_xpath1_parser.py @@ -911,6 +911,16 @@ class XPath1ParserTest(unittest.TestCase): self.check_selector("count(B)", root, 3) self.check_selector("count(.//C)", root, 5) + root = self.etree.XML('5') + self.check_selector("count(@avg)", root, 0) + self.check_selector("count(@max)", root, 1) + self.check_selector("count(@min)", root, 1) + self.check_selector("count(@min | @max)", root, 2) + self.check_selector("count(@min | @avg)", root, 1) + self.check_selector("count(@top | @avg)", root, 0) + self.check_selector("count(@min | @max) = 1", root, False) + self.check_selector("count(@min | @max) = 2", root, True) + def test_sum_function(self): root = self.etree.XML(XML_DATA_TEST) self.check_value("sum($values)", 35)