Extend fn:string() to schema nodes

- Removed node_string_value(), replaced entirely by the method
    string_value() of the class XPathToken.
This commit is contained in:
Davide Brunato 2019-08-13 18:06:49 +02:00
parent 116e43fc4a
commit bc78682fe8
9 changed files with 134 additions and 76 deletions

View File

@ -47,12 +47,14 @@ XSI_NONS_SCHEMA_LOCATION = '{%s}schemaLocation' % XSI_NAMESPACE
# XML Schema types
XSD_NOTATION = '{%s}NOTATION' % XSD_NAMESPACE
XSD_ANY_ATOMIC_TYPE = '{%s}anyAtomicType' % XSD_NAMESPACE
XSD_UNTYPED = '{%s}untyped' % XSD_NAMESPACE
XSD_UNTYPED_ATOMIC = '{%s}untypedAtomic' % XSD_NAMESPACE
XSD_ID = '{%s}ID' % XSD_NAMESPACE
XSD_IDREF = '{%s}IDREF' % XSD_NAMESPACE
XSD_IDREFS = '{%s}IDREFS' % XSD_NAMESPACE
# XPath type labels defined in XSD namespace that are not XSD builtin types
XSD_UNTYPED = '{%s}untyped' % XSD_NAMESPACE
XSD_UNTYPED_ATOMIC = '{%s}untypedAtomic' % XSD_NAMESPACE
def get_namespace(name):
try:

View File

@ -21,9 +21,9 @@ from .tdop_parser import Parser, MultiLabel
from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
from .xpath_token import XPathToken
from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, \
is_element_node, is_document_node, is_attribute_node, is_text_node, is_comment_node, \
is_processing_instruction_node, node_name, node_string_value
from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
is_text_node, is_comment_node, is_processing_instruction_node, node_name
XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD")
@ -1200,7 +1200,7 @@ def evaluate(self, context=None):
def evaluate(self, context=None):
arg = self.get_argument(context, default_to_context=True)
try:
return float(node_string_value(arg) if is_xpath_node(arg) else arg)
return float(self.string_value(arg) if is_xpath_node(arg) else arg)
except (TypeError, ValueError):
return float('nan')

View File

@ -212,36 +212,50 @@ def cast(value):
# Constructors for datetime XSD types
@constructor('date')
def cast(value, tz=None):
if isinstance(value, Date10):
return value
return Date10.fromstring(value, tzinfo=tz)
@constructor('gDay')
def cast(value, tz=None):
if isinstance(value, XPathGregorianDay):
return value
return XPathGregorianDay.fromstring(value, tzinfo=tz)
@constructor('gMonth')
def cast(value, tz=None):
if isinstance(value, XPathGregorianMonth):
return value
return XPathGregorianMonth.fromstring(value, tzinfo=tz)
@constructor('gMonthDay')
def cast(value, tz=None):
if isinstance(value, XPathGregorianMonthDay):
return value
return XPathGregorianMonthDay.fromstring(value, tzinfo=tz)
@constructor('gYear')
def cast(value, tz=None):
if isinstance(value, XPathGregorianYear):
return value
return XPathGregorianYear.fromstring(value, tzinfo=tz)
@constructor('gYearMonth')
def cast(value, tz=None):
if isinstance(value, XPathGregorianYearMonth):
return value
return XPathGregorianYearMonth.fromstring(value, tzinfo=tz)
@constructor('time')
def cast(value, tz=None):
if isinstance(value, Time):
return value
return Time.fromstring(value, tzinfo=tz)

View File

@ -24,8 +24,8 @@ from .compat import PY3, string_base_type, unicode_chr, urlparse, urljoin, urlli
from .datatypes import QNAME_PATTERN, DateTime10, Date10, Time, Timezone, Duration, DayTimeDuration
from .namespaces import prefixed_to_qname, get_namespace
from .xpath_context import XPathSchemaContext
from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \
node_name, node_string_value, node_nilled, node_base_uri, node_document_uri
from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, \
is_attribute_node, node_name, node_nilled, node_base_uri, node_document_uri
from .xpath2_parser import XPath2Parser
method = XPath2Parser.method
@ -304,7 +304,7 @@ def evaluate(self, context=None):
return item
try:
return abs(node_string_value(item) if is_xpath_node(item) else item)
return abs(self.string_value(item) if is_xpath_node(item) else item)
except TypeError as err:
self.wrong_type(str(err))

View File

@ -19,10 +19,12 @@ import math
import operator
from .compat import MutableSequence, urlparse
from .exceptions import ElementPathError, ElementPathTypeError, MissingContextError
from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, XPATH_2_DEFAULT_NAMESPACES, \
XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, qname_to_prefixed, prefixed_to_qname
from .datatypes import XSD_BUILTIN_TYPES
from .exceptions import ElementPathError, ElementPathKeyError, \
ElementPathTypeError, MissingContextError
from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, \
XPATH_2_DEFAULT_NAMESPACES, XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, \
qname_to_prefixed, prefixed_to_qname, XSD_UNTYPED_ATOMIC
from .datatypes import UntypedAtomic, XSD_BUILTIN_TYPES
from .xpath_nodes import is_xpath_node
from .tdop_parser import create_tokenizer
from .xpath1_parser import XML_NCNAME_PATTERN, XPath1Parser
@ -324,10 +326,16 @@ class XPath2Parser(XPath1Parser):
}
def is_instance(self, obj, type_qname):
if self.schema is not None:
if type_qname == XSD_UNTYPED_ATOMIC:
return isinstance(obj, UntypedAtomic)
elif self.schema is not None:
return self.schema.is_instance(obj, type_qname)
local_name = type_qname.split('}')[1]
return XSD_BUILTIN_TYPES[local_name].validator(obj)
try:
return XSD_BUILTIN_TYPES[local_name].validator(obj)
except KeyError:
raise ElementPathKeyError("unknown type %r" % type_qname)
def parse(self, source):
root_token = super(XPath1Parser, self).parse(source)

View File

@ -15,7 +15,7 @@ from collections import namedtuple
from .compat import PY3, urlparse
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError, xpath_error
from .exceptions import ElementPathValueError
from .datatypes import ncname_validator
###
@ -111,6 +111,10 @@ def is_attribute_node(obj, name=None):
return obj[0] == name
def is_schema_node(obj):
return hasattr(obj, 'name') and hasattr(obj, 'local_name') and hasattr(obj, 'type')
def is_comment_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
@ -136,7 +140,8 @@ else:
def is_xpath_node(obj):
return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj)
return isinstance(obj, tuple) or is_etree_element(obj) or \
is_document_node(obj) or is_text_node(obj) or is_schema_node(obj)
###
@ -217,21 +222,3 @@ def node_name(obj):
return obj.tag
elif is_attribute_node(obj) or is_namespace_node(obj):
return obj[0]
def node_string_value(obj):
if is_element_node(obj):
return u''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
elif is_text_node(obj):
return obj
elif is_document_node(obj):
return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
elif is_namespace_node(obj):
return obj[1]
elif is_comment_node(obj):
return obj.text
elif is_processing_instruction_node(obj):
return obj.text

View File

@ -26,8 +26,10 @@ from decimal import Decimal
from .compat import string_base_type
from .exceptions import xpath_error
from .namespaces import XQT_ERRORS_NAMESPACE
from .xpath_nodes import AttributeNode, is_etree_element, \
is_element_node, is_document_node, is_xpath_node, node_string_value
from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
elem_iter_strings, is_text_node, is_namespace_node, is_comment_node, \
is_processing_instruction_node, is_element_node, is_document_node, \
is_xpath_node, is_schema_node
from .datatypes import UntypedAtomic, Timezone, DayTimeDuration, XSD_BUILTIN_TYPES
from .tdop_parser import Token
@ -351,17 +353,35 @@ class XPathToken(Token):
:param name: a not empty string.
:returns: the matched XSD type or `None` if there isn't a match.
"""
if name[0] != '{' and self.parser.default_namespace:
name = '{%s}%s' % (self.parser.default_namespace, name)
if isinstance(schema_item, AttributeNode):
if not schema_item[1].is_matching(name, self.parser.default_namespace):
if not schema_item[1].is_matching(name):
return
xsd_type = schema_item[1].type
try:
xsd_type = schema_item[1].type
except AttributeError:
try:
xsd_type = self.parser.schema.get_attribute(name).type
except AttributeError:
return
elif is_etree_element(schema_item):
if hasattr(schema_item, 'is_matching'):
if not schema_item.is_matching(name, self.parser.default_namespace):
return
elif schema_item.tag != name:
return
xsd_type = schema_item.type
try:
xsd_type = schema_item.type
except AttributeError:
try:
xsd_type = self.parser.schema.get_element(name).type
except AttributeError:
return
else:
return
@ -397,15 +417,10 @@ class XPathToken(Token):
return
elif not is_xpath_node(obj):
return obj
elif not hasattr(obj, 'type'):
return UntypedAtomic(node_string_value(obj))
elif obj.type.is_simple():
# In case of schema element or attribute use a the sample value
# of the primitive type
primitive_type = self.parser.schema.get_primitive_type(obj.type)
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
elif obj.type.local_name == 'anyType':
return XSD_BUILTIN_TYPES['anyType'].value
elif hasattr(obj, 'type'):
return self.schema_node_value(obj)
else:
return UntypedAtomic(self.string_value(obj))
def boolean_value(self, obj):
"""
@ -428,28 +443,56 @@ class XPathToken(Token):
raise self.error('FORG0006', "Effective boolean value is not defined for {}.".format(obj))
return bool(obj)
@staticmethod
def string_value(obj):
def string_value(self, obj):
"""
The string value, as computed by fn:string().
"""
if obj is None:
return ''
elif is_xpath_node(obj):
return node_string_value(obj)
elif is_element_node(obj):
return u''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
elif is_text_node(obj):
return obj
elif is_document_node(obj):
return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
elif is_namespace_node(obj):
return obj[1]
elif is_comment_node(obj):
return obj.text
elif is_processing_instruction_node(obj):
return obj.text
elif is_schema_node(obj):
return str(self.schema_node_value(obj))
else:
return str(obj)
@staticmethod
def number_value(obj):
def number_value(self, obj):
"""
The numeric value, as computed by fn:number() on each item. Returns a float value.
"""
try:
return float(node_string_value(obj) if is_xpath_node(obj) else obj)
return float(self.string_value(obj) if is_xpath_node(obj) else obj)
except (TypeError, ValueError):
return float('nan')
def schema_node_value(self, obj):
"""
Returns a sample typed value for the XSD schema node, valid in the value space
of the node. Used for schema-based dynamic evaluation of XPath expressions.
"""
try:
if obj.type.is_simple():
# In case of schema element or attribute use a the sample value
# of the primitive type
primitive_type = self.parser.schema.get_primitive_type(obj.type)
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
elif obj.type.local_name == 'anyType':
return XSD_BUILTIN_TYPES['anyType'].value
except AttributeError:
raise self.wrong_type("the argument %r is not a node of an XSD schema" % obj)
###
# Error handling helpers
def error(self, code, message=None):

View File

@ -21,7 +21,7 @@ from elementpath.xpath_nodes import AttributeNode, NamespaceNode, is_etree_eleme
is_element_node, is_attribute_node, is_comment_node, is_document_node, \
is_namespace_node, is_processing_instruction_node, is_text_node, node_attributes, \
node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \
node_nilled, node_kind, node_name, node_string_value
node_nilled, node_kind, node_name
from elementpath.xpath_helpers import boolean_value
from elementpath.xpath1_parser import XPath1Parser
@ -227,24 +227,6 @@ class NodeHelpersTest(unittest.TestCase):
self.assertEqual(node_name(attr), 'a1')
self.assertEqual(node_name(namespace), 'xs')
def test_node_string_value_function(self):
document = ElementTree.parse(io.StringIO(u'<A>123<B1>456</B1><B2>789</B2></A>'))
element = ElementTree.Element('schema')
attribute = AttributeNode('id', '0212349350')
namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema')
comment = ElementTree.Comment('nothing important')
pi = ElementTree.ProcessingInstruction('action', 'nothing to do')
text = u'betelgeuse'
self.assertEqual(node_string_value(document), '123456789')
self.assertEqual(node_string_value(element), '')
self.assertEqual(node_string_value(attribute), '0212349350')
self.assertEqual(node_string_value(namespace), 'http://www.w3.org/2001/XMLSchema')
self.assertEqual(node_string_value(comment), 'nothing important')
self.assertEqual(node_string_value(pi), 'action nothing to do')
self.assertEqual(node_string_value(text), 'betelgeuse')
self.assertIsNone(node_string_value(None))
self.assertIsNone(node_string_value(10))
class CompatibilityHelpersTest(unittest.TestCase):
@ -263,6 +245,5 @@ class CompatibilityHelpersTest(unittest.TestCase):
self.assertTrue(boolean_value(1))
if __name__ == '__main__':
unittest.main()

View File

@ -221,6 +221,26 @@ class XPath1ParserTest(unittest.TestCase):
token = self.parser.parse('true()')
self.assertIsNone(token.data_value(None))
def test_string_value_function(self):
token = self.parser.parse('true()')
document = ElementTree.parse(io.StringIO(u'<A>123<B1>456</B1><B2>789</B2></A>'))
element = ElementTree.Element('schema')
attribute = AttributeNode('id', '0212349350')
namespace = NamespaceNode('xs', 'http://www.w3.org/2001/XMLSchema')
comment = ElementTree.Comment('nothing important')
pi = ElementTree.ProcessingInstruction('action', 'nothing to do')
text = u'betelgeuse'
self.assertEqual(token.string_value(document), '123456789')
self.assertEqual(token.string_value(element), '')
self.assertEqual(token.string_value(attribute), '0212349350')
self.assertEqual(token.string_value(namespace), 'http://www.w3.org/2001/XMLSchema')
self.assertEqual(token.string_value(comment), 'nothing important')
self.assertEqual(token.string_value(pi), 'action nothing to do')
self.assertEqual(token.string_value(text), 'betelgeuse')
self.assertEqual(token.string_value(None), '')
self.assertEqual(token.string_value(10), '10')
def test_number_value_function(self):
token = self.parser.parse('true()')
self.assertEqual(token.number_value("19"), 19)
@ -845,6 +865,7 @@ class XPath1ParserTest(unittest.TestCase):
def test_number_function(self):
root = self.etree.XML('<root>15</root>')
self.check_value("number()", MissingContextError)
self.check_value("number()", 15, context=XPathContext(root))
self.check_value("number()", 15, context=XPathContext(root, item=root.text))
self.check_value("number(.)", 15, context=XPathContext(root))
@ -863,6 +884,8 @@ class XPath1ParserTest(unittest.TestCase):
results = select(root, "/values/*/number()", parser=self.parser.__class__)
self.assertEqual(results[:3], [3.4, 20.0, -10.1])
self.assertTrue(math.isnan(results[3]) and math.isnan(results[4]))
self.check_selector("number(/values/d)", root, 44.0)
self.check_selector("number(/values/a)", root, TypeError)
def test_sum_function(self):
root = self.etree.XML(XML_DATA_TEST)