From 77afa93a15c2e1ee31eb47240c34fe99ba642515 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 26 Sep 2019 16:24:29 +0200
Subject: [PATCH] Add validate() to XSD type interface

---
 elementpath/schema_proxy.py        | 12 +++++++-
 elementpath/xpath1_parser.py       | 47 +++++++++++++++++++-----------
 elementpath/xpath2_constructors.py |  4 ++-
 elementpath/xpath2_parser.py       | 15 +++++-----
 elementpath/xpath_nodes.py         |  2 +-
 elementpath/xpath_token.py         | 15 +++++++++-
 tests/test_package.py              |  4 +--
 tests/test_schema_proxy.py         | 18 ++++++++++--
 tests/test_xpath1_parser.py        | 10 +++++++
 9 files changed, 94 insertions(+), 33 deletions(-)

diff --git a/elementpath/schema_proxy.py b/elementpath/schema_proxy.py
index 32a529b..c38861f 100644
--- a/elementpath/schema_proxy.py
+++ b/elementpath/schema_proxy.py
@@ -106,10 +106,20 @@ class AbstractXsdType(AbstractXsdComponent):
         `False` otherwise.
         """
 
+    @abstractmethod
+    def validate(self, obj, *args, **kwargs):
+        """
+        Validates an XML object node using the XSD type. The argument *obj* is an element
+        for complex type nodes or a text value for simple type nodes. Raises a `ValueError`
+        compatible exception (a `ValueError` or a subclass of it) if the argument is not valid.
+        """
+
     @abstractmethod
     def decode(self, obj, *args, **kwargs):
         """
-        Decodes XML data using the XSD type.
+        Decodes an XML object node using the XSD type. The argument *obj* is an element
+        for complex type nodes or a text value for simple type nodes. Raises a `ValueError`
+        or a `TypeError` compatible exception if the argument it's not valid.
         """
 
 
diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py
index 3f05cf8..1b427a0 100644
--- a/elementpath/xpath1_parser.py
+++ b/elementpath/xpath1_parser.py
@@ -242,9 +242,12 @@ def evaluate(self, context=None):
         return
     name = self.value
     if name[0] != '{' and self.parser.default_namespace:
-        name = u'{%s}%s' % (self.parser.default_namespace, name)
+        tag = u'{%s}%s' % (self.parser.default_namespace, name)
+    else:
+        tag = name
 
     if isinstance(context, XPathSchemaContext):
+        # Bind with the XSD type
         xsd_type = self.match_xsd_type(context.item, name)
         if xsd_type is not None:
             if isinstance(context.item, AttributeNode):
@@ -254,21 +257,26 @@ def evaluate(self, context=None):
                 return context.item
 
     elif self.xsd_type is None:
+        # Untyped evaluation
         if is_attribute_node(context.item, name):
             return context.item[1]
-        elif is_element_node(context.item, name):
+        elif is_element_node(context.item, tag):
             return context.item
     else:
+        # XSD typed evaluation
         try:
             if is_attribute_node(context.item, name):
                 return self.xsd_type.decode(context.item[1])
-            elif is_element_node(context.item, name):
+            elif is_element_node(context.item, tag):
                 if self.xsd_type.is_simple():
-                    return self.xsd_type.decode(context.item)
-                else:
-                    return context.item
+                    return self.xsd_type.decode(context.item.text)
+                elif self.xsd_type.has_simple_content():
+                    self.xsd_type.decode(context.item.text)
+                return context.item
+
         except (TypeError, ValueError):
-            self.wrong_context_type("Type %r is not appropriate for the context" % (type(context.item)))
+            msg = "Type {!r} is not appropriate for the context item {!r}"
+            self.wrong_context_type(msg.format(self.xsd_type, context.item))
 
 
 @method('(name)')
@@ -277,9 +285,12 @@ def select(self, context=None):
         return
     name = self.value
     if name[0] != '{' and self.parser.default_namespace:
-        name = u'{%s}%s' % (self.parser.default_namespace, name)
+        tag = u'{%s}%s' % (self.parser.default_namespace, name)
+    else:
+        tag = name
 
     if isinstance(context, XPathSchemaContext):
+        # Bind with the XSD type
         for item in context.iter_children_or_self():
             xsd_type = self.match_xsd_type(item, name)
             if xsd_type is not None:
@@ -294,20 +305,22 @@ def select(self, context=None):
         for item in context.iter_children_or_self():
             if is_attribute_node(item, name):
                 yield item[1]
-            elif is_element_node(item, name):
+            elif is_element_node(item, tag):
                 yield item
     else:
-        # Typed selection
+        # XSD typed selection
         for item in context.iter_children_or_self():
             try:
                 if is_attribute_node(item, name):
                     yield self.xsd_type.decode(item[1])
-                elif is_element_node(item, name):
+                elif is_element_node(item, tag):
                     if self.xsd_type.is_simple():
-                        self.xsd_type.validate(item.text)
+                        yield self.xsd_type.decode(item.text)
+                    elif self.xsd_type.has_simple_content():
+                        self.xsd_type.decode(item.text)
+                        yield item
                     else:
-                        self.xsd_type.validate(item)
-                    yield item
+                        yield item
             except (TypeError, ValueError):
                 msg = "Type {!r} does not match sequence type of {!r}"
                 self.wrong_sequence_type(msg.format(self.xsd_type, item))
@@ -754,13 +767,13 @@ def led(self, left):
 @method('[')
 def select(self, context=None):
     if context is not None:
-        for position, _ in enumerate(self[0].select(context), start=1):
+        for position, item in enumerate(self[0].select(context), start=1):
             predicate = list(self[1].select(context.copy()))
             if len(predicate) == 1 and isinstance(predicate[0], NumericTypeProxy):
                 if position == predicate[0]:
-                    yield context.item
+                    yield item
             elif self.boolean_value(predicate):
-                yield context.item
+                yield item
 
 
 ###
diff --git a/elementpath/xpath2_constructors.py b/elementpath/xpath2_constructors.py
index d9af685..38b7949 100644
--- a/elementpath/xpath2_constructors.py
+++ b/elementpath/xpath2_constructors.py
@@ -214,6 +214,8 @@ def cast(value):
 def cast(value, tz=None):
     if isinstance(value, Date10):
         return value
+    elif isinstance(value, UntypedAtomic):
+        return Date10.fromstring(str(value), tzinfo=tz)
     return Date10.fromstring(value, tzinfo=tz)
 
 
@@ -419,7 +421,7 @@ def cast_to_boolean(value, context=None):
     elif isinstance(value, (int, float, decimal.Decimal)):
         return bool(value)
     elif isinstance(value, UntypedAtomic):
-        value = string_base_type(value)
+        value = unicode_type(value)
     elif not isinstance(value, string_base_type):
         raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value))
 
diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py
index 3f80090..07e05d3 100644
--- a/elementpath/xpath2_parser.py
+++ b/elementpath/xpath2_parser.py
@@ -18,7 +18,7 @@ import decimal
 import math
 import operator
 
-from .compat import MutableSequence, urlparse
+from .compat import MutableSequence, urlparse, unicode_type
 from .exceptions import ElementPathError, ElementPathKeyError, \
     ElementPathTypeError, MissingContextError
 from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, \
@@ -639,9 +639,10 @@ def evaluate(self, context=None):
         else:
             self.wrong_context_type("an atomic value is required")
 
+    input_value = self.data_value(result[0])
     try:
         if namespace != XSD_NAMESPACE:
-            value = self.parser.schema.cast_as(result[0], atomic_type)
+            value = self.parser.schema.cast_as(input_value, atomic_type)
         else:
             local_name = atomic_type.split('}')[1]
             token_class = self.parser.symbol_table.get(local_name)
@@ -649,13 +650,13 @@ def evaluate(self, context=None):
                 self.unknown_atomic_type("atomic type %r not found in the in-scope schema types" % self[1].source)
 
             if local_name in {'base64Binary', 'hexBinary'}:
-                value = token_class.cast(result[0], self[0].label == 'literal')
+                value = token_class.cast(input_value, self[0].label == 'literal')
             elif local_name in {'dateTime', 'date', 'gDay', 'gMonth', 'gMonthDay', 'gYear', 'gYearMonth', 'time'}:
-                value = token_class.cast(result[0], tz=None if context is None else context.timezone)
+                value = token_class.cast(input_value, tz=None if context is None else context.timezone)
             elif local_name == 'QName':
-                value = token_class.cast(result[0], self.parser.namespaces)
+                value = token_class.cast(input_value, self.parser.namespaces)
             else:
-                value = token_class.cast(result[0])
+                value = token_class.cast(input_value)
 
     except ElementPathError as err:
         if self.symbol != 'cast':
@@ -668,7 +669,7 @@ def evaluate(self, context=None):
     except TypeError as err:
         if self.symbol != 'cast':
             return False
-        self.wrong_type(str(err))
+        self.wrong_type(unicode_type(err))
     except ValueError as err:
         if self.symbol != 'cast':
             return False
diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py
index 330a2af..eb1f4ed 100644
--- a/elementpath/xpath_nodes.py
+++ b/elementpath/xpath_nodes.py
@@ -51,7 +51,7 @@ def elem_iter_strings(elem):
 # Element-like objects are used for representing elements and comments, ElementTree-like objects
 # for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
 ###
-def is_element_node(obj, tag=None):
+def is_element_node(obj, tag=None, default_namespace=None):
     """
     Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
     Raises a ValueError if the argument tag has to be used but it's in a wrong format.
diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py
index 4b90443..f1bf231 100644
--- a/elementpath/xpath_token.py
+++ b/elementpath/xpath_token.py
@@ -434,7 +434,20 @@ class XPathToken(Token):
         elif not is_xpath_node(obj):
             return obj
         elif hasattr(obj, 'type'):
-            return self.schema_node_value(obj)
+            return self.schema_node_value(obj)  # Schema context
+        elif self.xsd_type is None:
+            return UntypedAtomic(self.string_value(obj))
+
+        # XSD type bound data
+        try:
+            if is_attribute_node(obj):
+                return self.xsd_type.decode(obj[1])
+            elif is_element_node(obj):
+                return self.xsd_type.decode(obj.text)
+        except TypeError as err:
+            self.wrong_type(str(err))
+        except ValueError as err:
+            self.wrong_value(str(err))
         else:
             return UntypedAtomic(self.string_value(obj))
 
diff --git a/tests/test_package.py b/tests/test_package.py
index f566880..b51e810 100644
--- a/tests/test_package.py
+++ b/tests/test_package.py
@@ -27,7 +27,7 @@ class PackageTest(unittest.TestCase):
         cls.get_version = re.compile(r"(?:\bversion|__version__)(?:\s*=\s*)(\'[^\']*\'|\"[^\"]*\")")
 
     def test_missing_debug_statements(self):
-        message = "\nFound a debug missing statement at line %d or file %r: %r"
+        message = "\nFound a debug missing statement at line %d of file %r: %r"
         filename = None
         for line in fileinput.input(glob.glob(os.path.join(self.source_dir, '*.py'))):
             if fileinput.isfirstline():
@@ -38,7 +38,7 @@ class PackageTest(unittest.TestCase):
             self.assertIsNone(match, message % (lineno, filename, match.group(0) if match else None))
 
     def test_version_matching(self):
-        message = "\nFound a different version at line %d or file %r: %r (maybe %r)."
+        message = "\nFound a different version at line %d of file %r: %r (maybe %r)."
         files = [
             os.path.join(self.source_dir, '__init__.py'),
             os.path.join(self.package_dir, 'setup.py'),
diff --git a/tests/test_schema_proxy.py b/tests/test_schema_proxy.py
index 1f80230..bcd1d07 100644
--- a/tests/test_schema_proxy.py
+++ b/tests/test_schema_proxy.py
@@ -244,14 +244,26 @@ class XPath2ParserXMLSchemaTest(test_xpath2_parser.XPath2ParserTest):
         self.assertEqual(token[0][1].xsd_type, schema.types['rangeType'])
         self.assertEqual(token[1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE])
 
-        context = XPathContext(
-            root=self.etree.XML('<values xmlns="http://xpath.test/ns"><b min="19"/></values>'))
         token = parser.parse("//b/@min lt //b/@max")
         self.assertEqual(token[0][0][0].xsd_type, schema.types['rangeType'])
         self.assertEqual(token[0][1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE])
         self.assertEqual(token[1][0][0].xsd_type, schema.types['rangeType'])
         self.assertEqual(token[1][1][0].xsd_type, schema.maps.types['{%s}integer' % XSD_NAMESPACE])
-        self.assertIsNone(token.evaluate(context))
+
+        root = self.etree.XML('<values xmlns="http://xpath.test/ns"><b min="19"/></values>')
+        with self.assertRaises(TypeError):
+            token.evaluate(context=XPathContext(root))
+
+        root = self.etree.XML('<values xmlns="http://xpath.test/ns"><b min="19">30</b></values>')
+        self.assertIsNone(token.evaluate(context=XPathContext(root)))
+
+        root = self.etree.XML('<values xmlns="http://xpath.test/ns"><b min="19" max="40">30</b></values>')
+        context = XPathContext(root)
+        self.assertTrue(token.evaluate(context))
+
+        root = self.etree.XML('<values xmlns="http://xpath.test/ns"><b min="19" max="10">30</b></values>')
+        context = XPathContext(root)
+        self.assertFalse(token.evaluate(context))
 
     def test_instance_of_expression(self):
         element = self.etree.Element('schema')
diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py
index 422f560..2121d04 100644
--- a/tests/test_xpath1_parser.py
+++ b/tests/test_xpath1_parser.py
@@ -911,6 +911,16 @@ class XPath1ParserTest(unittest.TestCase):
         self.check_selector("count(B)", root, 3)
         self.check_selector("count(.//C)", root, 5)
 
+        root = self.etree.XML('<value max="10" min="0">5</value>')
+        self.check_selector("count(@avg)", root, 0)
+        self.check_selector("count(@max)", root, 1)
+        self.check_selector("count(@min)", root, 1)
+        self.check_selector("count(@min | @max)", root, 2)
+        self.check_selector("count(@min | @avg)", root, 1)
+        self.check_selector("count(@top | @avg)", root, 0)
+        self.check_selector("count(@min | @max) = 1", root, False)
+        self.check_selector("count(@min | @max) = 2", root, True)
+
     def test_sum_function(self):
         root = self.etree.XML(XML_DATA_TEST)
         self.check_value("sum($values)", 35)