diff --git a/elementpath/namespaces.py b/elementpath/namespaces.py index 0233636..33bb981 100644 --- a/elementpath/namespaces.py +++ b/elementpath/namespaces.py @@ -47,12 +47,14 @@ XSI_NONS_SCHEMA_LOCATION = '{%s}schemaLocation' % XSI_NAMESPACE # XML Schema types XSD_NOTATION = '{%s}NOTATION' % XSD_NAMESPACE XSD_ANY_ATOMIC_TYPE = '{%s}anyAtomicType' % XSD_NAMESPACE -XSD_UNTYPED = '{%s}untyped' % XSD_NAMESPACE -XSD_UNTYPED_ATOMIC = '{%s}untypedAtomic' % XSD_NAMESPACE XSD_ID = '{%s}ID' % XSD_NAMESPACE XSD_IDREF = '{%s}IDREF' % XSD_NAMESPACE XSD_IDREFS = '{%s}IDREFS' % XSD_NAMESPACE +# XPath type labels defined in XSD namespace that are not XSD builtin types +XSD_UNTYPED = '{%s}untyped' % XSD_NAMESPACE +XSD_UNTYPED_ATOMIC = '{%s}untypedAtomic' % XSD_NAMESPACE + def get_namespace(name): try: diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index 7dad23d..d103f2b 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -21,9 +21,9 @@ from .tdop_parser import Parser, MultiLabel from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \ XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed from .xpath_token import XPathToken -from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, \ - is_element_node, is_document_node, is_attribute_node, is_text_node, is_comment_node, \ - is_processing_instruction_node, node_name, node_string_value +from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \ + is_xpath_node, is_element_node, is_document_node, is_attribute_node, \ + is_text_node, is_comment_node, is_processing_instruction_node, node_name XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF" u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD") @@ -1200,7 +1200,7 @@ def evaluate(self, context=None): def evaluate(self, context=None): arg = self.get_argument(context, default_to_context=True) try: - return float(node_string_value(arg) if is_xpath_node(arg) else arg) + return float(self.string_value(arg) if is_xpath_node(arg) else arg) except (TypeError, ValueError): return float('nan') diff --git a/elementpath/xpath2_constructors.py b/elementpath/xpath2_constructors.py index 4ac23c5..d9af685 100644 --- a/elementpath/xpath2_constructors.py +++ b/elementpath/xpath2_constructors.py @@ -212,36 +212,50 @@ def cast(value): # Constructors for datetime XSD types @constructor('date') def cast(value, tz=None): + if isinstance(value, Date10): + return value return Date10.fromstring(value, tzinfo=tz) @constructor('gDay') def cast(value, tz=None): + if isinstance(value, XPathGregorianDay): + return value return XPathGregorianDay.fromstring(value, tzinfo=tz) @constructor('gMonth') def cast(value, tz=None): + if isinstance(value, XPathGregorianMonth): + return value return XPathGregorianMonth.fromstring(value, tzinfo=tz) @constructor('gMonthDay') def cast(value, tz=None): + if isinstance(value, XPathGregorianMonthDay): + return value return XPathGregorianMonthDay.fromstring(value, tzinfo=tz) @constructor('gYear') def cast(value, tz=None): + if isinstance(value, XPathGregorianYear): + return value return XPathGregorianYear.fromstring(value, tzinfo=tz) @constructor('gYearMonth') def cast(value, tz=None): + if isinstance(value, XPathGregorianYearMonth): + return value return XPathGregorianYearMonth.fromstring(value, tzinfo=tz) @constructor('time') def cast(value, tz=None): + if isinstance(value, Time): + return value return Time.fromstring(value, tzinfo=tz) diff --git a/elementpath/xpath2_functions.py b/elementpath/xpath2_functions.py index 42b2269..3d12786 100644 --- a/elementpath/xpath2_functions.py +++ b/elementpath/xpath2_functions.py @@ -24,8 +24,8 @@ from .compat import PY3, string_base_type, unicode_chr, urlparse, urljoin, urlli from .datatypes import QNAME_PATTERN, DateTime10, Date10, Time, Timezone, Duration, DayTimeDuration from .namespaces import prefixed_to_qname, get_namespace from .xpath_context import XPathSchemaContext -from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \ - node_name, node_string_value, node_nilled, node_base_uri, node_document_uri +from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, \ + is_attribute_node, node_name, node_nilled, node_base_uri, node_document_uri from .xpath2_parser import XPath2Parser method = XPath2Parser.method @@ -304,7 +304,7 @@ def evaluate(self, context=None): return item try: - return abs(node_string_value(item) if is_xpath_node(item) else item) + return abs(self.string_value(item) if is_xpath_node(item) else item) except TypeError as err: self.wrong_type(str(err)) diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py index 185fcc0..6911a9d 100644 --- a/elementpath/xpath2_parser.py +++ b/elementpath/xpath2_parser.py @@ -19,10 +19,12 @@ import math import operator from .compat import MutableSequence, urlparse -from .exceptions import ElementPathError, ElementPathTypeError, MissingContextError -from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, XPATH_2_DEFAULT_NAMESPACES, \ - XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, qname_to_prefixed, prefixed_to_qname -from .datatypes import XSD_BUILTIN_TYPES +from .exceptions import ElementPathError, ElementPathKeyError, \ + ElementPathTypeError, MissingContextError +from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, \ + XPATH_2_DEFAULT_NAMESPACES, XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, \ + qname_to_prefixed, prefixed_to_qname, XSD_UNTYPED_ATOMIC +from .datatypes import UntypedAtomic, XSD_BUILTIN_TYPES from .xpath_nodes import is_xpath_node from .tdop_parser import create_tokenizer from .xpath1_parser import XML_NCNAME_PATTERN, XPath1Parser @@ -324,10 +326,16 @@ class XPath2Parser(XPath1Parser): } def is_instance(self, obj, type_qname): - if self.schema is not None: + if type_qname == XSD_UNTYPED_ATOMIC: + return isinstance(obj, UntypedAtomic) + elif self.schema is not None: return self.schema.is_instance(obj, type_qname) + local_name = type_qname.split('}')[1] - return XSD_BUILTIN_TYPES[local_name].validator(obj) + try: + return XSD_BUILTIN_TYPES[local_name].validator(obj) + except KeyError: + raise ElementPathKeyError("unknown type %r" % type_qname) def parse(self, source): root_token = super(XPath1Parser, self).parse(source) diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py index f159bb9..330a2af 100644 --- a/elementpath/xpath_nodes.py +++ b/elementpath/xpath_nodes.py @@ -15,7 +15,7 @@ from collections import namedtuple from .compat import PY3, urlparse from .namespaces import XML_BASE, XSI_NIL -from .exceptions import ElementPathValueError, xpath_error +from .exceptions import ElementPathValueError from .datatypes import ncname_validator ### @@ -111,6 +111,10 @@ def is_attribute_node(obj, name=None): return obj[0] == name +def is_schema_node(obj): + return hasattr(obj, 'name') and hasattr(obj, 'local_name') and hasattr(obj, 'type') + + def is_comment_node(obj): return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment' @@ -136,7 +140,8 @@ else: def is_xpath_node(obj): - return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj) + return isinstance(obj, tuple) or is_etree_element(obj) or \ + is_document_node(obj) or is_text_node(obj) or is_schema_node(obj) ### @@ -217,21 +222,3 @@ def node_name(obj): return obj.tag elif is_attribute_node(obj) or is_namespace_node(obj): return obj[0] - - -def node_string_value(obj): - if is_element_node(obj): - return u''.join(elem_iter_strings(obj)) - elif is_attribute_node(obj): - return obj[1] - elif is_text_node(obj): - return obj - elif is_document_node(obj): - return u''.join(e.text for e in obj.getroot().iter() if e.text is not None) - elif is_namespace_node(obj): - return obj[1] - elif is_comment_node(obj): - return obj.text - elif is_processing_instruction_node(obj): - return obj.text - diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py index aebb2b0..1caa038 100644 --- a/elementpath/xpath_token.py +++ b/elementpath/xpath_token.py @@ -26,8 +26,10 @@ from decimal import Decimal from .compat import string_base_type from .exceptions import xpath_error from .namespaces import XQT_ERRORS_NAMESPACE -from .xpath_nodes import AttributeNode, is_etree_element, \ - is_element_node, is_document_node, is_xpath_node, node_string_value +from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \ + elem_iter_strings, is_text_node, is_namespace_node, is_comment_node, \ + is_processing_instruction_node, is_element_node, is_document_node, \ + is_xpath_node, is_schema_node from .datatypes import UntypedAtomic, Timezone, DayTimeDuration, XSD_BUILTIN_TYPES from .tdop_parser import Token @@ -351,17 +353,35 @@ class XPathToken(Token): :param name: a not empty string. :returns: the matched XSD type or `None` if there isn't a match. """ + if name[0] != '{' and self.parser.default_namespace: + name = '{%s}%s' % (self.parser.default_namespace, name) + if isinstance(schema_item, AttributeNode): - if not schema_item[1].is_matching(name, self.parser.default_namespace): + if not schema_item[1].is_matching(name): return - xsd_type = schema_item[1].type + + try: + xsd_type = schema_item[1].type + except AttributeError: + try: + xsd_type = self.parser.schema.get_attribute(name).type + except AttributeError: + return + elif is_etree_element(schema_item): if hasattr(schema_item, 'is_matching'): if not schema_item.is_matching(name, self.parser.default_namespace): return elif schema_item.tag != name: return - xsd_type = schema_item.type + + try: + xsd_type = schema_item.type + except AttributeError: + try: + xsd_type = self.parser.schema.get_element(name).type + except AttributeError: + return else: return @@ -397,15 +417,10 @@ class XPathToken(Token): return elif not is_xpath_node(obj): return obj - elif not hasattr(obj, 'type'): - return UntypedAtomic(node_string_value(obj)) - elif obj.type.is_simple(): - # In case of schema element or attribute use a the sample value - # of the primitive type - primitive_type = self.parser.schema.get_primitive_type(obj.type) - return XSD_BUILTIN_TYPES[primitive_type.local_name].value - elif obj.type.local_name == 'anyType': - return XSD_BUILTIN_TYPES['anyType'].value + elif hasattr(obj, 'type'): + return self.schema_node_value(obj) + else: + return UntypedAtomic(self.string_value(obj)) def boolean_value(self, obj): """ @@ -428,28 +443,56 @@ class XPathToken(Token): raise self.error('FORG0006', "Effective boolean value is not defined for {}.".format(obj)) return bool(obj) - @staticmethod - def string_value(obj): + def string_value(self, obj): """ The string value, as computed by fn:string(). """ if obj is None: return '' - elif is_xpath_node(obj): - return node_string_value(obj) + elif is_element_node(obj): + return u''.join(elem_iter_strings(obj)) + elif is_attribute_node(obj): + return obj[1] + elif is_text_node(obj): + return obj + elif is_document_node(obj): + return u''.join(e.text for e in obj.getroot().iter() if e.text is not None) + elif is_namespace_node(obj): + return obj[1] + elif is_comment_node(obj): + return obj.text + elif is_processing_instruction_node(obj): + return obj.text + elif is_schema_node(obj): + return str(self.schema_node_value(obj)) else: return str(obj) - @staticmethod - def number_value(obj): + def number_value(self, obj): """ The numeric value, as computed by fn:number() on each item. Returns a float value. """ try: - return float(node_string_value(obj) if is_xpath_node(obj) else obj) + return float(self.string_value(obj) if is_xpath_node(obj) else obj) except (TypeError, ValueError): return float('nan') + def schema_node_value(self, obj): + """ + Returns a sample typed value for the XSD schema node, valid in the value space + of the node. Used for schema-based dynamic evaluation of XPath expressions. + """ + try: + if obj.type.is_simple(): + # In case of schema element or attribute use a the sample value + # of the primitive type + primitive_type = self.parser.schema.get_primitive_type(obj.type) + return XSD_BUILTIN_TYPES[primitive_type.local_name].value + elif obj.type.local_name == 'anyType': + return XSD_BUILTIN_TYPES['anyType'].value + except AttributeError: + raise self.wrong_type("the argument %r is not a node of an XSD schema" % obj) + ### # Error handling helpers def error(self, code, message=None): diff --git a/tests/test_helpers.py b/tests/test_helpers.py index b80a180..b75cc36 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -21,7 +21,7 @@ from elementpath.xpath_nodes import AttributeNode, NamespaceNode, is_etree_eleme is_element_node, is_attribute_node, is_comment_node, is_document_node, \ is_namespace_node, is_processing_instruction_node, is_text_node, node_attributes, \ node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \ - node_nilled, node_kind, node_name, node_string_value + node_nilled, node_kind, node_name from elementpath.xpath_helpers import boolean_value from elementpath.xpath1_parser import XPath1Parser @@ -227,24 +227,6 @@ class NodeHelpersTest(unittest.TestCase): self.assertEqual(node_name(attr), 'a1') self.assertEqual(node_name(namespace), 'xs') - def test_node_string_value_function(self): - document = ElementTree.parse(io.StringIO(u'123456789')) - element = ElementTree.Element('schema') - attribute = AttributeNode('id', '0212349350') - namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') - comment = ElementTree.Comment('nothing important') - pi = ElementTree.ProcessingInstruction('action', 'nothing to do') - text = u'betelgeuse' - self.assertEqual(node_string_value(document), '123456789') - self.assertEqual(node_string_value(element), '') - self.assertEqual(node_string_value(attribute), '0212349350') - self.assertEqual(node_string_value(namespace), 'http://www.w3.org/2001/XMLSchema') - self.assertEqual(node_string_value(comment), 'nothing important') - self.assertEqual(node_string_value(pi), 'action nothing to do') - self.assertEqual(node_string_value(text), 'betelgeuse') - self.assertIsNone(node_string_value(None)) - self.assertIsNone(node_string_value(10)) - class CompatibilityHelpersTest(unittest.TestCase): @@ -263,6 +245,5 @@ class CompatibilityHelpersTest(unittest.TestCase): self.assertTrue(boolean_value(1)) - if __name__ == '__main__': unittest.main() diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py index 9f7e4f9..7816700 100644 --- a/tests/test_xpath1_parser.py +++ b/tests/test_xpath1_parser.py @@ -221,6 +221,26 @@ class XPath1ParserTest(unittest.TestCase): token = self.parser.parse('true()') self.assertIsNone(token.data_value(None)) + def test_string_value_function(self): + token = self.parser.parse('true()') + + document = ElementTree.parse(io.StringIO(u'123456789')) + element = ElementTree.Element('schema') + attribute = AttributeNode('id', '0212349350') + namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema') + comment = ElementTree.Comment('nothing important') + pi = ElementTree.ProcessingInstruction('action', 'nothing to do') + text = u'betelgeuse' + self.assertEqual(token.string_value(document), '123456789') + self.assertEqual(token.string_value(element), '') + self.assertEqual(token.string_value(attribute), '0212349350') + self.assertEqual(token.string_value(namespace), 'http://www.w3.org/2001/XMLSchema') + self.assertEqual(token.string_value(comment), 'nothing important') + self.assertEqual(token.string_value(pi), 'action nothing to do') + self.assertEqual(token.string_value(text), 'betelgeuse') + self.assertEqual(token.string_value(None), '') + self.assertEqual(token.string_value(10), '10') + def test_number_value_function(self): token = self.parser.parse('true()') self.assertEqual(token.number_value("19"), 19) @@ -845,6 +865,7 @@ class XPath1ParserTest(unittest.TestCase): def test_number_function(self): root = self.etree.XML('15') + self.check_value("number()", MissingContextError) self.check_value("number()", 15, context=XPathContext(root)) self.check_value("number()", 15, context=XPathContext(root, item=root.text)) self.check_value("number(.)", 15, context=XPathContext(root)) @@ -863,6 +884,8 @@ class XPath1ParserTest(unittest.TestCase): results = select(root, "/values/*/number()", parser=self.parser.__class__) self.assertEqual(results[:3], [3.4, 20.0, -10.1]) self.assertTrue(math.isnan(results[3]) and math.isnan(results[4])) + self.check_selector("number(/values/d)", root, 44.0) + self.check_selector("number(/values/a)", root, TypeError) def test_sum_function(self): root = self.etree.XML(XML_DATA_TEST)