debian-elementpath/elementpath/xpath_token.py

561 lines
21 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
XPathToken and helper functions for XPath nodes. XPath error messages and node helper functions
are embedded in XPathToken class, in order to raise errors related to token instances.
In XPath there are 7 kinds of nodes:
element, attribute, text, namespace, processing-instruction, comment, document
Element-like objects are used for representing elements and comments, ElementTree-like objects
for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
"""
import locale
import contextlib
from decimal import Decimal
from .compat import string_base_type
from .exceptions import xpath_error
from .namespaces import XQT_ERRORS_NAMESPACE
from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
elem_iter_strings, is_text_node, is_namespace_node, is_comment_node, \
is_processing_instruction_node, is_element_node, is_document_node, \
is_xpath_node, is_schema_node
from .datatypes import UntypedAtomic, Timezone, DayTimeDuration, XSD_BUILTIN_TYPES
from .tdop_parser import Token
def ordinal(n):
least_significant_digit = n % 10
if least_significant_digit == 1:
return '%dst' % n
elif least_significant_digit == 2:
return '%dnd' % n
elif least_significant_digit == 3:
return '%drd' % n
else:
return '%dth' % n
class XPathToken(Token):
"""Base class for XPath tokens."""
comment = None # for XPath 2.0+ comments
xsd_type = None # fox XPath 2.0+ schema types labeling
def evaluate(self, context=None):
"""
Evaluate default method for XPath tokens.
:param context: The XPath dynamic context.
"""
return list(self.select(context))
def select(self, context=None):
"""
Select operator that generates XPath results.
:param context: The XPath dynamic context.
"""
item = self.evaluate(context)
if item is not None:
if isinstance(item, list):
for _item in item:
yield _item
else:
if context is not None:
context.item = item
yield item
def __str__(self):
symbol, label = self.symbol, self.label
if symbol == '$':
return '$%s variable reference' % (self[0].value if self else '')
elif symbol == ',':
return 'comma operator'
elif label == 'function':
return '%r function' % symbol
elif label == 'axis':
return '%r axis' % symbol
return super(XPathToken, self).__str__()
@property
def source(self):
symbol, label = self.symbol, self.label
if label == 'axis':
return u'%s::%s' % (self.symbol, self[0].source)
elif label in ('function', 'constructor'):
return u'%s(%s)' % (self.symbol, ', '.join(item.source for item in self))
elif symbol == ':':
return u'%s:%s' % (self[0].source, self[1].source)
elif symbol == '(':
return '()' if not self else u'(%s)' % self[0].source
elif symbol == ',':
return u'%s, %s' % (self[0].source, self[1].source)
elif symbol == '$':
return u'$%s' % self[0].source
elif symbol == '{':
return u'{%s}%s' % (self.value, self[0].source)
elif symbol == 'instance':
return u'%s instance of %s' % (self[0].source, ''.join(t.source for t in self[1:]))
elif symbol == 'treat':
return u'%s treat as %s' % (self[0].source, ''.join(t.source for t in self[1:]))
return super(XPathToken, self).source
@property
def error_prefix(self):
for prefix, ns in self.parser.namespaces.items():
if ns == XQT_ERRORS_NAMESPACE:
return prefix
else:
return 'err'
###
# Helper methods
def get_argument(self, context, index=0, required=False, default_to_context=False,
default=None, cls=None):
"""
Get the argument value of a function of constructor token. A zero length sequence is
converted to a `None` value. If the function has no argument returns the context's
item if the dynamic context is not `None`.
:param context: the dynamic context.
:param index: an index for select the argument to be got, the first for default.
:param required: if set to `True` missing or empty sequence arguments are not allowed.
:param default_to_context: if set to `True` then the item of the dynamic context is \
returned when the argument is missing.
:param default: the default value returned in case the argument is an empty sequence. \
If not provided returns `None`.
:param cls: if a type is provided performs a type checking on item.
"""
try:
selector = self[index].select
except IndexError:
if default_to_context:
if context is None:
self.missing_context()
item = context.item if context.item is not None else context.root
elif required:
raise self.error('XPST0017', "Missing %s argument" % ordinal(index + 1))
else:
return
else:
item = None
for k, result in enumerate(selector(context)):
if k == 0:
item = result
elif not self.parser.compatibility_mode:
self.wrong_context_type("a sequence of more than one item is not allowed as argument")
else:
break
else:
if item is None:
if not required:
return default
ord_arg = ordinal(index + 1)
self.missing_sequence("A not empty sequence required for %s argument" % ord_arg)
# Type promotion checking (see "function conversion rules" in XPath 2.0 language definition)
if cls is not None and not isinstance(item, cls):
if self.parser.compatibility_mode:
if issubclass(cls, string_base_type):
return self.string_value(item)
elif issubclass(cls, float) or issubclass(float, cls):
return self.number_value(item)
if self.parser.version > '1.0':
value = self.data_value(item)
if isinstance(value, cls):
return value
elif isinstance(value, UntypedAtomic):
try:
if issubclass(cls, string_base_type):
return str(value)
else:
return cls(value)
except (TypeError, ValueError):
pass
code = 'XPTY0004' if self.label == 'function' else 'FORG0006'
message = "the %s argument %r is not an instance of %r"
raise self.error(code, message % (ordinal(index + 1), item, cls))
return item
def atomization(self, context=None):
"""
Helper method for value atomization of a sequence.
Ref: https://www.w3.org/TR/xpath20/#id-atomization
:param context: the XPath context.
"""
for item in self.select(context):
value = self.data_value(item)
if value is None:
raise self.error('FOTY0012', "argument node does not have a typed value: {}".format(item))
else:
yield value
def get_atomized_operand(self, context=None):
"""
Get the atomized value for an XPath operator.
:param context: the XPath context.
:return: the atomized value of a single length sequence or `None` if the sequence is empty.
"""
selector = iter(self.atomization(context))
try:
value = next(selector)
except StopIteration:
return
else:
try:
next(selector)
except StopIteration:
return str(value) if isinstance(value, UntypedAtomic) else value
else:
self.wrong_context_type("atomized operand is a sequence of length greater than one")
def get_comparison_data(self, context):
"""
Get comparison data couples for the general comparison of sequences. Different sequences
maybe generated with an XPath 2.0 parser, depending on compatibility mode setting.
Ref: https://www.w3.org/TR/xpath20/#id-general-comparisons
:param context: the XPath dynamic context.
:returns: a list of data couples.
"""
if context is None:
operand1, operand2 = list(self[0].select()), list(self[1].select())
else:
operand1 = list(self[0].select(context.copy()))
operand2 = list(self[1].select(context.copy()))
if self.parser.compatibility_mode:
# Boolean comparison if one of the results is a single boolean value (1.)
try:
if isinstance(operand1[0], bool):
if len(operand1) == 1:
return [(operand1[0], self.boolean_value(operand2))]
if isinstance(operand2[0], bool):
if len(operand2) == 1:
return [(self.boolean_value(operand1), operand2[0])]
except IndexError:
return []
# Converts to float for lesser-greater operators (3.)
if self.symbol in ('<', '<=', '>', '>='):
return [
(float(self.data_value(value1)), float(self.data_value(value2)))
for value1 in operand1 for value2 in operand2
]
return [(self.data_value(value1), self.data_value(value2))
for value1 in operand1 for value2 in operand2]
def get_results(self, context):
"""
Returns formatted XPath results.
:param context: the XPath dynamic context.
:return: a list or a simple datatype when the result is a single simple type \
generated by a literal or function token.
"""
results = list(self.select(context))
if len(results) == 1:
res = results[0]
if isinstance(res, tuple) or is_etree_element(res) or is_document_node(res):
return results
elif self.symbol in ('text', 'node'):
return results
elif self.label in ('function', 'literal'):
return res
elif isinstance(res, bool): # Tests and comparisons
return res
else:
return results
else:
return results
def get_operands(self, context, cls=None):
"""
Returns the operands for a binary operator. Float arguments are converted
to decimal if the other argument is a `Decimal` instance.
:param context: the XPath dynamic context.
:param cls: if a type is provided performs a type checking on item.
:return: a couple of values representing the operands. If any operand \
is not available returns a `(None, None)` couple.
"""
arg1 = self.get_argument(context, cls=cls)
if arg1 is None:
return None, None
arg2 = self.get_argument(context, index=1, cls=cls)
if arg2 is None:
return None, None
if isinstance(arg1, Decimal) and isinstance(arg2, float):
return arg1, Decimal(arg2)
elif isinstance(arg2, Decimal) and isinstance(arg1, float):
return Decimal(arg1), arg2
return arg1, arg2
def adjust_datetime(self, context, cls):
"""
XSD datetime adjust function helper.
:param context: the XPath dynamic context.
:param cls: the XSD datetime subclass to use.
:return: an empty list if there is only one argument that is the empty sequence \
or the adjusted XSD datetime instance.
"""
if len(self) == 1:
item = self.get_argument(context, cls=cls)
if item is None:
return []
timezone = getattr(context, 'timezone', None)
else:
item = self.get_argument(context=None, cls=cls) # don't use implicit timezone
timezone = self.get_argument(context, 1, cls=DayTimeDuration)
if timezone is not None:
timezone = Timezone.fromduration(timezone)
if item.tzinfo is not None and timezone is not None:
item += timezone.offset - item.tzinfo.offset
item.tzinfo = timezone
elif item.tzinfo is None:
if timezone is not None:
item.tzinfo = timezone
elif timezone is None:
item.tzinfo = None
return item
def match_xsd_type(self, schema_item, name):
"""
Match a token with a schema type, checking the matching between the provided schema
item and name. If there is a match and the token is already related with another
schema type an exception is raised.
:param schema_item: an XPath item related with a schema instance.
:param name: a not empty string.
:returns: the matched XSD type or `None` if there isn't a match.
"""
if name[0] != '{' and self.parser.default_namespace:
name = '{%s}%s' % (self.parser.default_namespace, name)
if isinstance(schema_item, AttributeNode):
if not schema_item[1].is_matching(name):
return
try:
xsd_type = schema_item[1].type
except AttributeError:
try:
xsd_type = self.parser.schema.get_attribute(name).type
except AttributeError:
return
elif is_etree_element(schema_item):
if hasattr(schema_item, 'is_matching'):
if not schema_item.is_matching(name, self.parser.default_namespace):
return
elif schema_item.tag != name:
return
try:
xsd_type = schema_item.type
except AttributeError:
try:
xsd_type = self.parser.schema.get_element(name).type
except AttributeError:
return
else:
return
if self.xsd_type is None:
self.xsd_type = xsd_type
elif self.xsd_type is not xsd_type:
self.wrong_context_type("Multiple XSD type matching during static analysis")
return xsd_type
@contextlib.contextmanager
def use_locale(self, collation):
"""A context manager for setting a specific collation for a code block."""
locale.setlocale(locale.LC_ALL, '')
default_locale = locale.getlocale()
try:
locale.setlocale(locale.LC_ALL, collation)
except locale.Error:
raise self.error('FOCH0002', 'Unsupported collation %r' % collation)
else:
yield
finally:
locale.setlocale(locale.LC_ALL, default_locale)
###
# XPath data conversion base functions
def data_value(self, obj):
"""
The typed value, as computed by fn:data() on each item. Returns an instance of
UntypedAtomic.
"""
if obj is None:
return
elif not is_xpath_node(obj):
return obj
elif hasattr(obj, 'type'):
return self.schema_node_value(obj)
else:
return UntypedAtomic(self.string_value(obj))
def boolean_value(self, obj):
"""
The effective boolean value, as computed by fn:boolean().
"""
if isinstance(obj, list):
if not obj:
return False
elif isinstance(obj[0], tuple) or is_element_node(obj[0]):
return True
elif len(obj) == 1:
return bool(obj[0])
else:
raise self.error(
code='FORG0006',
message="Effective boolean value is not defined for a sequence of two or "
"more items not starting with an XPath node.",
)
elif isinstance(obj, tuple) or is_element_node(obj):
raise self.error('FORG0006', "Effective boolean value is not defined for {}.".format(obj))
return bool(obj)
def string_value(self, obj):
"""
The string value, as computed by fn:string().
"""
if obj is None:
return ''
elif is_element_node(obj):
return u''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
elif is_text_node(obj):
return obj
elif is_document_node(obj):
return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
elif is_namespace_node(obj):
return obj[1]
elif is_comment_node(obj):
return obj.text
elif is_processing_instruction_node(obj):
return obj.text
elif is_schema_node(obj):
return str(self.schema_node_value(obj))
else:
return str(obj)
def number_value(self, obj):
"""
The numeric value, as computed by fn:number() on each item. Returns a float value.
"""
try:
return float(self.string_value(obj) if is_xpath_node(obj) else obj)
except (TypeError, ValueError):
return float('nan')
def schema_node_value(self, obj):
"""
Returns a sample typed value for the XSD schema node, valid in the value space
of the node. Used for schema-based dynamic evaluation of XPath expressions.
"""
try:
if obj.type.is_simple():
# In case of schema element or attribute use a the sample value
# of the primitive type
primitive_type = self.parser.schema.get_primitive_type(obj.type)
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
elif obj.type.local_name == 'anyType':
return XSD_BUILTIN_TYPES['anyType'].value
except AttributeError:
raise self.wrong_type("the argument %r is not a node of an XSD schema" % obj)
###
# Error handling helpers
def error(self, code, message=None):
"""
Returns an XPath error instance related with a code. An XPath/XQuery/XSLT error code is an
alphanumeric token starting with four uppercase letters and ending with four digits.
:param code: the error code.
:param message: an optional custom additional message.
"""
return xpath_error(code, message, self, self.error_prefix)
# Shortcuts for XPath errors
def wrong_syntax(self, message=None):
if self.symbol == '::' and self.parser.token.symbol == '(name)':
self.missing_axis(message or "Axis '%s::' not found" % self.parser.token.value)
super(XPathToken, self).wrong_syntax(message)
def wrong_value(self, message=None):
raise self.error('FOCA0002', message)
def wrong_type(self, message=None):
raise self.error('FORG0006', message)
def missing_schema(self, message=None):
raise self.error('XPST0001', message)
def missing_context(self, message=None):
raise self.error('XPDY0002', message)
def wrong_context_type(self, message=None):
raise self.error('XPTY0004', message)
def missing_sequence(self, message=None):
raise self.error('XPST0005', message)
def missing_name(self, message=None):
raise self.error('XPST0008', message)
def missing_axis(self, message=None):
raise self.error('XPST0010', message)
def wrong_nargs(self, message=None):
raise self.error('XPST0017', message)
def wrong_step_result(self, message=None):
raise self.error('XPTY0018', message)
def wrong_intermediate_step_result(self, message=None):
raise self.error('XPTY0019', message)
def wrong_axis_argument(self, message=None):
raise self.error('XPTY0020', message)
def wrong_sequence_type(self, message=None):
raise self.error('XPDY0050', message)
def unknown_atomic_type(self, message=None):
raise self.error('XPST0051', message)
def wrong_target_type(self, message=None):
raise self.error('XPST0080', message)
def unknown_namespace(self, message=None):
raise self.error('XPST0081', message)