Split xpath_helpers module in two parts

- Datatype conversion helpers moved fo token class
  - Node helpers moved to a new module
  - Kept boolean_value() for backward compatibility
This commit is contained in:
Davide Brunato 2019-08-03 22:53:37 +02:00
parent ad96ad7e7e
commit aeba3aab05
15 changed files with 406 additions and 376 deletions

View File

@ -29,9 +29,9 @@ copyright = '2018-2019, SISSA (International School for Advanced Studies)'
author = 'Davide Brunato'
# The short X.Y version
version = '1.1'
version = '1.2'
# The full version, including alpha/beta/rc tags
release = '1.1.9'
release = '1.2.0'
# -- General configuration ---------------------------------------------------

View File

@ -8,7 +8,7 @@
#
# @author Davide Brunato <brunato@sissa.it>
#
__version__ = '1.1.9'
__version__ = '1.2.0'
__author__ = "Davide Brunato"
__contact__ = "brunato@sissa.it"
__copyright__ = "Copyright 2018-2019, SISSA"
@ -23,7 +23,7 @@ from .exceptions import ElementPathError, MissingContextError, \
from . import datatypes
from .tdop_parser import Token, Parser
from .xpath_context import XPathContext, XPathSchemaContext
from .xpath_helpers import AttributeNode, NamespaceNode
from .xpath_nodes import AttributeNode, NamespaceNode
from .xpath_token import XPathToken
from .xpath1_parser import XPath1Parser
from .xpath2_constructors import XPath2Parser

View File

@ -12,7 +12,7 @@ from abc import ABCMeta, abstractmethod
from .compat import add_metaclass
from .exceptions import ElementPathTypeError, ElementPathValueError
from .namespaces import XSD_NAMESPACE
from .xpath_helpers import is_etree_element
from .xpath_nodes import is_etree_element
from .xpath_context import XPathSchemaContext

View File

@ -13,17 +13,17 @@ import math
import decimal
from .compat import PY3, string_base_type
from .exceptions import ElementPathSyntaxError, ElementPathTypeError, ElementPathNameError, \
MissingContextError
from .exceptions import ElementPathSyntaxError, ElementPathTypeError, \
ElementPathNameError, MissingContextError
from .datatypes import UntypedAtomic, DayTimeDuration, YearMonthDuration, XSD_BUILTIN_TYPES
from .xpath_context import XPathSchemaContext
from .tdop_parser import Parser, MultiLabel
from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
from .xpath_token import XPathToken
from .xpath_helpers import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, is_element_node, \
is_document_node, is_attribute_node, is_text_node, is_comment_node, is_processing_instruction_node, \
node_name, node_string_value, boolean_value, data_value, string_value, number_value
from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, \
is_element_node, is_document_node, is_attribute_node, is_text_node, is_comment_node, \
is_processing_instruction_node, node_name, node_string_value
XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD")
@ -687,7 +687,7 @@ def evaluate(self, context=None):
try:
next(selector)
except StopIteration:
return data_value(value)
return self.data_value(value)
else:
self.wrong_context_type("atomized operand is a sequence of length greater than one")
@ -730,7 +730,7 @@ def select(self, context=None):
isinstance(predicate[0], (int, float)):
if context.position == predicate[0] - 1:
yield context.item
elif boolean_value(predicate, self):
elif self.boolean_value(predicate):
yield context.item
@ -1008,7 +1008,7 @@ def evaluate(self, context=None):
# String functions
@method(function('string', nargs=1))
def evaluate(self, context=None):
return string_value(self.get_argument(context))
return self.string_value(self.get_argument(context))
@method(function('contains', nargs=2))
@ -1020,7 +1020,8 @@ def evaluate(self, context=None):
@method(function('concat'))
def evaluate(self, context=None):
return ''.join(string_value(self.get_argument(context, index=k)) for k in range(len(self)))
return ''.join(self.string_value(self.get_argument(context, index=k))
for k in range(len(self)))
@method(function('string-length', nargs=1))
@ -1126,12 +1127,12 @@ def evaluate(self, context=None):
# Boolean functions
@method(function('boolean', nargs=1))
def evaluate(self, context=None):
return boolean_value(self[0].get_results(context), self)
return self.boolean_value(self[0].get_results(context))
@method(function('not', nargs=1))
def evaluate(self, context=None):
return not boolean_value(self[0].get_results(context), self)
return not self.boolean_value(self[0].get_results(context))
@method(function('true', nargs=0))
@ -1179,7 +1180,8 @@ def evaluate(self, context=None):
@method(function('sum', nargs=(1, 2)))
def evaluate(self, context=None):
values = [number_value(x) if isinstance(x, UntypedAtomic) else x for x in self[0].select(context)]
values = [self.number_value(x) if isinstance(x, UntypedAtomic) else x
for x in self[0].select(context)]
if not values:
zero = 0 if len(self) == 1 else self.get_argument(context, index=1)
return [] if zero is None else zero
@ -1191,7 +1193,7 @@ def evaluate(self, context=None):
return sum(values)
try:
return sum(number_value(x) for x in values)
return sum(self.number_value(x) for x in values)
except TypeError:
if self.parser.version == '1.0':
return float('nan')
@ -1205,7 +1207,7 @@ def evaluate(self, context=None):
if arg is None:
return float('nan') if self.parser.version == '1.0' else []
elif is_xpath_node(arg) or self.parser.compatibility_mode:
arg = number_value(arg)
arg = self.number_value(arg)
if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
return arg
@ -1222,7 +1224,7 @@ def evaluate(self, context=None):
if arg is None:
return float('nan') if self.parser.version == '1.0' else []
elif is_xpath_node(arg) or self.parser.compatibility_mode:
arg = number_value(arg)
arg = self.number_value(arg)
if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
return arg

View File

@ -16,7 +16,7 @@ import codecs
from .compat import unicode_type, urlparse, URLError, string_base_type
from .exceptions import ElementPathError, xpath_error
from .xpath_helpers import is_attribute_node, boolean_value, string_value
from .xpath_nodes import is_attribute_node
from .datatypes import DateTime10, Date10, Time, XPathGregorianDay, XPathGregorianMonth, \
XPathGregorianMonthDay, XPathGregorianYear, XPathGregorianYearMonth, UntypedAtomic, Duration, \
YearMonthDuration, DayTimeDuration, WHITESPACES_PATTERN, QNAME_PATTERN, NMTOKEN_PATTERN, NAME_PATTERN, \
@ -438,7 +438,7 @@ def nud(self):
@method('boolean')
def evaluate(self, context=None):
if self.label == 'function':
return boolean_value(self[0].get_results(context), self)
return self.boolean_value(self[0].get_results(context))
# xs:boolean constructor
item = self.get_argument(context)
@ -469,7 +469,7 @@ def nud(self):
@method('string')
def evaluate(self, context=None):
if self.label == 'function':
return string_value(self.get_argument(context))
return self.string_value(self.get_argument(context))
else:
item = self.get_argument(context)
return [] if item is None else str(item)

View File

@ -24,8 +24,8 @@ from .compat import PY3, string_base_type, unicode_chr, urlparse, urljoin, urlli
from .datatypes import QNAME_PATTERN, DateTime10, Date10, Time, Timezone, Duration, DayTimeDuration
from .namespaces import prefixed_to_qname, get_namespace
from .xpath_context import XPathSchemaContext
from .xpath_helpers import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \
node_name, node_string_value, node_nilled, node_base_uri, node_document_uri, data_value, string_value
from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \
node_name, node_string_value, node_nilled, node_base_uri, node_document_uri
from .xpath2_parser import XPath2Parser
method = XPath2Parser.method
@ -245,7 +245,7 @@ def evaluate(self, context=None):
@method(function('data', nargs=1))
def select(self, context=None):
for item in self[0].select(context):
value = data_value(item)
value = self.data_value(item)
if value is None:
raise self.error('FOTY0012', "argument node does not have a typed value: %r" % item)
else:
@ -380,7 +380,7 @@ def select(self, context=None):
nan = False
results = []
for item in self[0].select(context):
value = data_value(item)
value = self.data_value(item)
if context is not None:
context.item = value
if not nan and isinstance(value, float) and math.isnan(value):
@ -440,7 +440,7 @@ def select(self, context=None):
@method(function('unordered', nargs=1))
def select(self, context=None):
for result in sorted(list(self[0].select(context)), key=lambda x: string_value(x)):
for result in sorted(list(self[0].select(context)), key=lambda x: self.string_value(x)):
yield result
@ -646,7 +646,7 @@ def evaluate(self, context=None):
@method(function('string-join', nargs=2))
def evaluate(self, context=None):
items = [string_value(s) if is_element_node(s) else s
items = [self.string_value(s) if is_element_node(s) else s
for s in self[0].select(context)]
try:
return self.get_argument(context, 1, cls=string_base_type).join(items)

View File

@ -23,7 +23,7 @@ from .exceptions import ElementPathError, ElementPathTypeError, MissingContextEr
from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, XPATH_2_DEFAULT_NAMESPACES, \
XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, qname_to_prefixed, prefixed_to_qname
from .datatypes import XSD_BUILTIN_TYPES
from .xpath_helpers import is_xpath_node, boolean_value
from .xpath_nodes import is_xpath_node
from .tdop_parser import create_tokenizer
from .xpath1_parser import XML_NCNAME_PATTERN, XPath1Parser
from .xpath_context import XPathSchemaContext
@ -257,7 +257,7 @@ class XPath2Parser(XPath1Parser):
except TypeError as err:
raise self.error('FORG0006', str(err))
def cast(value):
def cast(_value):
raise NotImplementedError
pattern = r'\b%s(?=\s*\(|\s*\(\:.*\:\)\()' % symbol
@ -413,7 +413,7 @@ def nud(self):
@method('if')
def evaluate(self, context=None):
if boolean_value(self[0].evaluate(context), self):
if self.boolean_value(self[0].evaluate(context)):
return self[1].evaluate(context)
else:
return self[2].evaluate(context)
@ -421,7 +421,7 @@ def evaluate(self, context=None):
@method('if')
def select(self, context=None):
if boolean_value(list(self[0].select(context)), self):
if self.boolean_value(list(self[0].select(context))):
for result in self[1].select(context):
yield result
else:
@ -462,7 +462,7 @@ def evaluate(self, context=None):
for results in product(*selectors):
for i in range(len(results)):
context.variables[self[i * 2][0].value] = results[i]
if boolean_value(list(self[-1].select(context.copy())), self):
if self.boolean_value(list(self[-1].select(context.copy()))):
if some:
return True
elif not some:
@ -608,8 +608,8 @@ def evaluate(self, context=None):
raise self.error('XPST0080')
namespace = get_namespace(atomic_type)
if namespace != XSD_NAMESPACE and self.parser.schema is None or \
self.parser.schema.get_type(atomic_type) is None:
if namespace != XSD_NAMESPACE and \
(self.parser.schema is None or self.parser.schema.get_type(atomic_type) is None):
self.missing_schema("type %r not found in schema" % atomic_type)
result = [res for res in self[0].select(context)]

View File

@ -11,7 +11,7 @@
import datetime
from .exceptions import ElementPathTypeError
from .xpath_helpers import AttributeNode, is_etree_element, is_element_node, \
from .xpath_nodes import AttributeNode, is_etree_element, is_element_node, \
is_document_node, is_attribute_node

View File

@ -8,258 +8,14 @@
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
from .compat import PY3, urlparse
from .namespaces import XML_BASE, XSI_NIL, XSD_UNTYPED, XSD_UNTYPED_ATOMIC
from .exceptions import ElementPathValueError, xpath_error
from .datatypes import UntypedAtomic, ncname_validator
###
# Node types
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
from .exceptions import xpath_error
from .xpath_nodes import is_element_node
###
# Utility functions for ElementTree's Element instances
def is_etree_element(obj):
return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
def elem_iter_strings(elem):
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
###
# XPath node test functions
#
# XPath has there are 7 kinds of nodes:
#
# element, attribute, text, namespace, processing-instruction, comment, document
#
# Element-like objects are used for representing elements and comments, ElementTree-like objects
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
###
def is_element_node(obj, tag=None):
"""
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if not is_etree_element(obj) or callable(obj.tag):
return False
elif tag is None:
return True
elif not obj.tag:
return obj.tag == tag
elif tag == '*' or tag == '*:*':
return obj.tag != ''
elif tag[0] == '*':
try:
_, name = tag.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
else:
return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
elif tag[-1] == '*':
if tag[0] != '{' or '}' not in tag:
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
else:
return obj.tag == tag
def is_attribute_node(obj, name=None):
"""
Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
Raises a ValueError if the argument name has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if name is None or name == '*' or name == '*:*':
return isinstance(obj, AttributeNode)
elif not isinstance(obj, AttributeNode):
return False
elif name[0] == '*':
try:
_, _name = name.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
else:
return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
elif name[-1] == '*':
if name[0] != '{' or '}' not in name:
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
else:
return obj[0] == name
def is_comment_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
def is_processing_instruction_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
def is_document_node(obj):
return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
def is_namespace_node(obj):
return isinstance(obj, NamespaceNode)
if not PY3:
def is_text_node(obj):
return isinstance(obj, (str, unicode))
else:
def is_text_node(obj):
return isinstance(obj, str)
def is_xpath_node(obj):
return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj)
###
# Node accessors: in this implementation node accessors return None instead of empty sequence.
# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
def node_attributes(obj):
if is_element_node(obj):
return obj.attrib
def node_base_uri(obj):
try:
if is_element_node(obj):
return obj.attrib[XML_BASE]
elif is_document_node(obj):
return obj.getroot().attrib[XML_BASE]
except KeyError:
pass
def node_document_uri(obj):
if is_document_node(obj):
try:
uri = obj.getroot().attrib[XML_BASE]
parts = urlparse(uri)
except (KeyError, ValueError):
pass
else:
if parts.scheme and parts.netloc or parts.path.startswith('/'):
return uri
def node_children(obj):
if is_element_node(obj):
return (child for child in obj)
elif is_document_node(obj):
return (child for child in [obj.getroot()])
def node_is_id(obj):
if is_element_node(obj):
return ncname_validator(obj.text)
elif is_attribute_node(obj):
return ncname_validator(obj[1])
def node_is_idrefs(obj):
if is_element_node(obj):
return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
elif is_attribute_node(obj):
return all(ncname_validator(x) for x in obj[1].split())
def node_nilled(obj):
if is_element_node(obj):
return obj.get(XSI_NIL) in ('true', '1')
def node_kind(obj):
if is_element_node(obj):
return 'element'
elif is_attribute_node(obj):
return 'attribute'
elif is_text_node(obj):
return 'text'
elif is_document_node(obj):
return 'document'
elif is_namespace_node(obj):
return 'namespace'
elif is_comment_node(obj):
return 'comment'
elif is_processing_instruction_node(obj):
return 'processing-instruction'
def node_name(obj):
if is_element_node(obj):
return obj.tag
elif is_attribute_node(obj) or is_namespace_node(obj):
return obj[0]
def node_string_value(obj):
if is_element_node(obj):
return u''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
elif is_text_node(obj):
return obj
elif is_document_node(obj):
return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
elif is_namespace_node(obj):
return obj[1]
elif is_comment_node(obj):
return obj.text
elif is_processing_instruction_node(obj):
return obj.text
def node_type_name(obj, schema=None):
if is_element_node(obj):
if schema is not None:
xsd_element = schema.get_element(obj.tag)
if xsd_element is not None:
return xsd_element.type.name
return XSD_UNTYPED
elif is_attribute_node(obj):
if schema is not None:
xsd_attribute = schema.get_attribute(obj[0])
if xsd_attribute is not None:
return xsd_attribute.type.name
return XSD_UNTYPED_ATOMIC
elif is_text_node(obj):
return XSD_UNTYPED_ATOMIC
###
# XPath base functions
def boolean_value(obj, token=None):
"""
The effective boolean value, as computed by fn:boolean().
Moved to token class but kept for backward compatibility.
"""
if isinstance(obj, list):
if not obj:
@ -280,38 +36,3 @@ def boolean_value(obj, token=None):
message="Effective boolean value is not defined for {}.".format(obj)
)
return bool(obj)
def string_value(obj):
"""
The string value, as computed by fn:string().
"""
if obj is None:
return ''
elif is_xpath_node(obj):
return node_string_value(obj)
else:
return str(obj)
def data_value(obj):
"""
The typed value, as computed by fn:data() on each item. Returns an instance of
UntypedAtomic.
"""
if obj is None:
return
elif not is_xpath_node(obj):
return obj
else:
return UntypedAtomic(node_string_value(obj))
def number_value(obj):
"""
The numeric value, as computed by fn:number() on each item. Returns a float value.
"""
try:
return float(node_string_value(obj) if is_xpath_node(obj) else obj)
except (TypeError, ValueError):
return float('nan')

264
elementpath/xpath_nodes.py Normal file
View File

@ -0,0 +1,264 @@
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
from .compat import PY3, urlparse
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError, xpath_error
from .datatypes import ncname_validator
###
# Node types
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
###
# Utility functions for ElementTree's Element instances
def is_etree_element(obj):
return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
def elem_iter_strings(elem):
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
###
# XPath node test functions
#
# XPath has there are 7 kinds of nodes:
#
# element, attribute, text, namespace, processing-instruction, comment, document
#
# Element-like objects are used for representing elements and comments, ElementTree-like objects
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
###
def is_element_node(obj, tag=None):
"""
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if not is_etree_element(obj) or callable(obj.tag):
return False
elif tag is None:
return True
elif not obj.tag:
return obj.tag == tag
elif tag == '*' or tag == '*:*':
return obj.tag != ''
elif tag[0] == '*':
try:
_, name = tag.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
else:
return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
elif tag[-1] == '*':
if tag[0] != '{' or '}' not in tag:
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
else:
return obj.tag == tag
def is_attribute_node(obj, name=None):
"""
Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
Raises a ValueError if the argument name has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if name is None or name == '*' or name == '*:*':
return isinstance(obj, AttributeNode)
elif not isinstance(obj, AttributeNode):
return False
elif name[0] == '*':
try:
_, _name = name.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
else:
return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
elif name[-1] == '*':
if name[0] != '{' or '}' not in name:
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
else:
return obj[0] == name
def is_comment_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
def is_processing_instruction_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
def is_document_node(obj):
return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
def is_namespace_node(obj):
return isinstance(obj, NamespaceNode)
if not PY3:
def is_text_node(obj):
return isinstance(obj, (str, unicode))
else:
def is_text_node(obj):
return isinstance(obj, str)
def is_xpath_node(obj):
return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj)
###
# Node accessors: in this implementation node accessors return None instead of empty sequence.
# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
def node_attributes(obj):
if is_element_node(obj):
return obj.attrib
def node_base_uri(obj):
try:
if is_element_node(obj):
return obj.attrib[XML_BASE]
elif is_document_node(obj):
return obj.getroot().attrib[XML_BASE]
except KeyError:
pass
def node_document_uri(obj):
if is_document_node(obj):
try:
uri = obj.getroot().attrib[XML_BASE]
parts = urlparse(uri)
except (KeyError, ValueError):
pass
else:
if parts.scheme and parts.netloc or parts.path.startswith('/'):
return uri
def node_children(obj):
if is_element_node(obj):
return (child for child in obj)
elif is_document_node(obj):
return (child for child in [obj.getroot()])
def node_is_id(obj):
if is_element_node(obj):
return ncname_validator(obj.text)
elif is_attribute_node(obj):
return ncname_validator(obj[1])
def node_is_idrefs(obj):
if is_element_node(obj):
return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
elif is_attribute_node(obj):
return all(ncname_validator(x) for x in obj[1].split())
def node_nilled(obj):
if is_element_node(obj):
return obj.get(XSI_NIL) in ('true', '1')
def node_kind(obj):
if is_element_node(obj):
return 'element'
elif is_attribute_node(obj):
return 'attribute'
elif is_text_node(obj):
return 'text'
elif is_document_node(obj):
return 'document'
elif is_namespace_node(obj):
return 'namespace'
elif is_comment_node(obj):
return 'comment'
elif is_processing_instruction_node(obj):
return 'processing-instruction'
def node_name(obj):
if is_element_node(obj):
return obj.tag
elif is_attribute_node(obj) or is_namespace_node(obj):
return obj[0]
def node_string_value(obj):
if is_element_node(obj):
return u''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
elif is_text_node(obj):
return obj
elif is_document_node(obj):
return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
elif is_namespace_node(obj):
return obj[1]
elif is_comment_node(obj):
return obj.text
elif is_processing_instruction_node(obj):
return obj.text
###
# XPath base functions
def boolean_value(obj, token=None):
"""
The effective boolean value, as computed by fn:boolean().
Moved to token class but kept for backward compatibility.
"""
if isinstance(obj, list):
if not obj:
return False
elif isinstance(obj[0], tuple) or is_element_node(obj[0]):
return True
elif len(obj) == 1:
return bool(obj[0])
else:
raise xpath_error(
code='FORG0006', token=token, prefix=getattr(token, 'error_prefix', 'err'),
message="Effective boolean value is not defined for a sequence of two or "
"more items not starting with an XPath node.",
)
elif isinstance(obj, tuple) or is_element_node(obj):
raise xpath_error(
code='FORG0006', token=token, prefix=getattr(token, 'error_prefix', 'err'),
message="Effective boolean value is not defined for {}.".format(obj)
)
return bool(obj)

View File

@ -25,9 +25,9 @@ import contextlib
from .compat import string_base_type
from .exceptions import xpath_error
from .namespaces import XQT_ERRORS_NAMESPACE
from .xpath_helpers import AttributeNode, is_etree_element, is_document_node, boolean_value, \
string_value, data_value, number_value
from .datatypes import UntypedAtomic, Timezone, DayTimeDuration
from .xpath_nodes import AttributeNode, is_etree_element, \
is_element_node, is_document_node, is_xpath_node, node_string_value
from .datatypes import UntypedAtomic, Timezone, DayTimeDuration, XSD_BUILTIN_TYPES
from .tdop_parser import Token
@ -167,12 +167,12 @@ class XPathToken(Token):
if cls is not None and not isinstance(item, cls):
if self.parser.compatibility_mode:
if issubclass(cls, string_base_type):
return string_value(item)
return self.string_value(item)
elif issubclass(cls, float):
return number_value(item)
return self.number_value(item)
if self.parser.version > '1.0':
value = data_value(item)
value = self.data_value(item)
if isinstance(value, UntypedAtomic):
try:
return str(value) if issubclass(cls, string_base_type) else cls(value)
@ -193,7 +193,7 @@ class XPathToken(Token):
:param context: the XPath context.
"""
for item in self.select(context):
value = data_value(item)
value = self.data_value(item)
if value is None:
raise self.error('FOTY0012', "argument node does not have a typed value: {}".format(item))
else:
@ -240,21 +240,22 @@ class XPathToken(Token):
try:
if isinstance(operand1[0], bool):
if len(operand1) == 1:
return [(operand1[0], boolean_value(operand2, self))]
return [(operand1[0], self.boolean_value(operand2))]
if isinstance(operand2[0], bool):
if len(operand2) == 1:
return [(boolean_value(operand1, self), operand2[0])]
return [(self.boolean_value(operand1), operand2[0])]
except IndexError:
return []
# Converts to float for lesser-greater operators (3.)
if self.symbol in ('<', '<=', '>', '>='):
return [
(float(data_value(value1)), float(data_value(value2)))
(float(self.data_value(value1)), float(self.data_value(value2)))
for value1 in operand1 for value2 in operand2
]
return [(data_value(value1), data_value(value2)) for value1 in operand1 for value2 in operand2]
return [(self.data_value(value1), self.data_value(value2))
for value1 in operand1 for value2 in operand2]
def get_results(self, context):
"""
@ -355,6 +356,68 @@ class XPathToken(Token):
finally:
locale.setlocale(locale.LC_ALL, default_locale)
###
# XPath data conversion base functions
def data_value(self, obj):
"""
The typed value, as computed by fn:data() on each item. Returns an instance of
UntypedAtomic.
"""
if obj is None:
return
elif not is_xpath_node(obj):
return obj
elif not hasattr(obj, 'type'):
return UntypedAtomic(node_string_value(obj))
elif obj.type.is_simple():
# In case of schema element or attribute use a the sample value
# of the primitive type
primitive_type = self.parser.schema.get_primitive_type(obj.type)
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
def boolean_value(self, obj):
"""
The effective boolean value, as computed by fn:boolean().
"""
if isinstance(obj, list):
if not obj:
return False
elif isinstance(obj[0], tuple) or is_element_node(obj[0]):
return True
elif len(obj) == 1:
return bool(obj[0])
else:
raise self.error(
code='FORG0006',
message="Effective boolean value is not defined for a sequence of two or "
"more items not starting with an XPath node.",
)
elif isinstance(obj, tuple) or is_element_node(obj):
raise self.error('FORG0006', "Effective boolean value is not defined for {}.".format(obj))
return bool(obj)
@staticmethod
def string_value(obj):
"""
The string value, as computed by fn:string().
"""
if obj is None:
return ''
elif is_xpath_node(obj):
return node_string_value(obj)
else:
return str(obj)
@staticmethod
def number_value(obj):
"""
The numeric value, as computed by fn:number() on each item. Returns a float value.
"""
try:
return float(node_string_value(obj) if is_xpath_node(obj) else obj)
except (TypeError, ValueError):
return float('nan')
###
# Error handling helpers
def error(self, code, message=None):

View File

@ -15,7 +15,7 @@ with open("README.rst") as readme:
setup(
name='elementpath',
version='1.1.9',
version='1.2.0',
packages=['elementpath'],
author='Davide Brunato',
author_email='brunato@sissa.it',

View File

@ -24,7 +24,7 @@ import unittest
if __name__ == '__main__':
try:
from tests.test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, XPathHelpersTest
from tests.test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, NodeHelpersTest
from tests.test_datatypes import UntypedAtomicTest, DateTimeTypesTest, DurationTypesTest, TimezoneTypeTest
from tests.test_context import XPathContextTest
from tests.test_xpath1_parser import XPath1ParserTest, LxmlXPath1ParserTest
@ -34,7 +34,7 @@ if __name__ == '__main__':
from tests.test_package import PackageTest
except ImportError:
# Python 2 fallback
from test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, XPathHelpersTest
from test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, NodeHelpersTest
from test_datatypes import UntypedAtomicTest, DateTimeTypesTest, DurationTypesTest, TimezoneTypeTest
from test_context import XPathContextTest
from test_xpath1_parser import XPath1ParserTest, LxmlXPath1ParserTest

View File

@ -12,17 +12,16 @@
from __future__ import unicode_literals
import unittest
import io
import math
import xml.etree.ElementTree as ElementTree
from xmlschema import XMLSchema
from elementpath.exceptions import ElementPathError, xpath_error
from elementpath.schema_proxy import XMLSchemaProxy
from elementpath.namespaces import XSD_NAMESPACE, get_namespace, qname_to_prefixed, prefixed_to_qname
from elementpath.xpath_helpers import AttributeNode, NamespaceNode, is_etree_element, is_element_node, \
is_attribute_node, is_comment_node, is_document_node, is_namespace_node, is_processing_instruction_node, \
is_text_node, node_attributes, node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \
node_nilled, node_kind, node_name, node_string_value, node_type_name, boolean_value, data_value, number_value
from elementpath.namespaces import XSD_NAMESPACE, get_namespace, qname_to_prefixed, \
prefixed_to_qname
from elementpath.xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
is_element_node, is_attribute_node, is_comment_node, is_document_node, \
is_namespace_node, is_processing_instruction_node, is_text_node, node_attributes, \
node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \
node_nilled, node_kind, node_name, node_string_value
from elementpath.xpath1_parser import XPath1Parser
@ -78,7 +77,7 @@ class NamespaceHelpersTest(unittest.TestCase):
prefixed_to_qname('foo:', {'': 'ns'})
class XPathHelpersTest(unittest.TestCase):
class NodeHelpersTest(unittest.TestCase):
elem = ElementTree.XML('<node a1="10"/>')
def test_is_etree_element_function(self):
@ -245,40 +244,6 @@ class XPathHelpersTest(unittest.TestCase):
self.assertIsNone(node_string_value(None))
self.assertIsNone(node_string_value(10))
def test_node_type_name_function(self):
schema = XMLSchemaProxy(
XMLSchema("""<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:attribute name="slot" type="xs:token" />
<xs:element name="frame" type="xs:decimal" />
</xs:schema>""")
)
elem = ElementTree.Element('frame')
self.assertEqual(node_type_name(elem, schema), '{http://www.w3.org/2001/XMLSchema}decimal')
self.assertEqual(node_type_name(elem), '{http://www.w3.org/2001/XMLSchema}untyped')
elem = ElementTree.Element('alpha')
self.assertEqual(node_type_name(elem, schema), '{http://www.w3.org/2001/XMLSchema}untyped')
attr = AttributeNode('slot', 'x1')
self.assertEqual(node_type_name(attr, schema), '{http://www.w3.org/2001/XMLSchema}token')
self.assertEqual(node_type_name(attr), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
attr = AttributeNode('alpha', 'x1')
self.assertEqual(node_type_name(attr, schema), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
self.assertEqual(node_type_name('slot'), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
self.assertIsNone(node_type_name(10))
def test_boolean_value_function(self):
elem = ElementTree.Element('A')
with self.assertRaises(TypeError):
boolean_value(elem)
def test_data_value_function(self):
self.assertIsNone(data_value(None))
def test_number_value_function(self):
self.assertEqual(number_value("19"), 19)
self.assertTrue(math.isnan(number_value("not a number")))
if __name__ == '__main__':
unittest.main()

View File

@ -208,6 +208,21 @@ class XPath1ParserTest(unittest.TestCase):
else:
self.assertTrue(expected(results))
def test_boolean_value_function(self):
token = self.parser.parse('true()')
elem = ElementTree.Element('A')
with self.assertRaises(TypeError):
token.boolean_value(elem)
def test_data_value_function(self):
token = self.parser.parse('true()')
self.assertIsNone(token.data_value(None))
def test_number_value_function(self):
token = self.parser.parse('true()')
self.assertEqual(token.number_value("19"), 19)
self.assertTrue(math.isnan(token.number_value("not a number")))
# Wrong XPath expression checker shortcuts
def wrong_syntax(self, path):
self.assertRaises(SyntaxError, self.parser.parse, path)