603 lines
18 KiB
Python
603 lines
18 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
"""
|
|
XPath 2.0 implementation - part 3 (XSD constructors and multi-role tokens)
|
|
"""
|
|
import decimal
|
|
import codecs
|
|
|
|
from .compat import unicode_type, urlparse, URLError, string_base_type
|
|
from .exceptions import ElementPathError, xpath_error
|
|
from .xpath_nodes import is_attribute_node
|
|
from .datatypes import DateTime10, Date10, Time, XPathGregorianDay, XPathGregorianMonth, \
|
|
XPathGregorianMonthDay, XPathGregorianYear, XPathGregorianYearMonth, UntypedAtomic, Duration, \
|
|
YearMonthDuration, DayTimeDuration, WHITESPACES_PATTERN, QNAME_PATTERN, NMTOKEN_PATTERN, NAME_PATTERN, \
|
|
NCNAME_PATTERN, HEX_BINARY_PATTERN, NOT_BASE64_BINARY_PATTERN, LANGUAGE_CODE_PATTERN, WRONG_ESCAPE_PATTERN
|
|
from .xpath2_functions import XPath2Parser
|
|
|
|
|
|
def collapse_white_spaces(s):
|
|
return WHITESPACES_PATTERN.sub(' ', s).strip()
|
|
|
|
|
|
register = XPath2Parser.register
|
|
unregister = XPath2Parser.unregister
|
|
method = XPath2Parser.method
|
|
constructor = XPath2Parser.constructor
|
|
|
|
|
|
###
|
|
# Constructors for string-based XSD types
|
|
@constructor('normalizedString')
|
|
def cast(value):
|
|
return str(value).replace('\t', ' ').replace('\n', ' ')
|
|
|
|
|
|
@constructor('token')
|
|
def cast(value):
|
|
return collapse_white_spaces(value)
|
|
|
|
|
|
@constructor('language')
|
|
def cast(value):
|
|
match = LANGUAGE_CODE_PATTERN.match(collapse_white_spaces(value))
|
|
if match is None:
|
|
raise xpath_error('FOCA0002', "%r is not a language code" % value)
|
|
return match.group()
|
|
|
|
|
|
@constructor('NMTOKEN')
|
|
def cast(value):
|
|
match = NMTOKEN_PATTERN.match(collapse_white_spaces(value))
|
|
if match is None:
|
|
raise xpath_error('FOCA0002', "%r is not an xs:NMTOKEN value" % value)
|
|
return match.group()
|
|
|
|
|
|
@constructor('Name')
|
|
def cast(value):
|
|
match = NAME_PATTERN.match(collapse_white_spaces(value))
|
|
if match is None:
|
|
raise xpath_error('FOCA0002', "%r is not an xs:Name value" % value)
|
|
return match.group()
|
|
|
|
|
|
@constructor('NCName')
|
|
@constructor('ID')
|
|
@constructor('IDREF')
|
|
@constructor('ENTITY')
|
|
def cast(value):
|
|
match = NCNAME_PATTERN.match(collapse_white_spaces(value))
|
|
if match is None:
|
|
raise xpath_error('FOCA0002', "invalid value %r for constructor" % value)
|
|
return match.group()
|
|
|
|
|
|
@constructor('anyURI')
|
|
def cast(value):
|
|
uri = collapse_white_spaces(value)
|
|
try:
|
|
urlparse(uri)
|
|
except URLError:
|
|
raise xpath_error('FOCA0002', "%r is not an xs:anyURI value" % value)
|
|
if uri.count('#') > 1:
|
|
raise xpath_error('FOCA0002', "%r is not an xs:anyURI value (too many # characters)" % value)
|
|
elif WRONG_ESCAPE_PATTERN.search(uri):
|
|
raise xpath_error('FOCA0002', "%r is not an xs:anyURI value (wrong escaping)" % value)
|
|
return uri
|
|
|
|
|
|
###
|
|
# Constructors for numeric XSD types
|
|
@constructor('decimal')
|
|
def cast(value):
|
|
try:
|
|
return decimal.Decimal(value)
|
|
except (ValueError, decimal.DecimalException) as err:
|
|
raise xpath_error('FORG0001', str(err))
|
|
|
|
|
|
@constructor('double')
|
|
@constructor('float')
|
|
def cast(value):
|
|
try:
|
|
return float(value)
|
|
except ValueError as err:
|
|
raise xpath_error('FORG0001', str(err))
|
|
|
|
|
|
def cast_to_integer(value, lower_bound=None, higher_bound=None):
|
|
"""
|
|
XSD integer types constructor helper.
|
|
|
|
:param value: the value to convert.
|
|
:param lower_bound: if not `None` the result must be higher or equal than its value.
|
|
:param higher_bound: if not `None` the result must be lesser than its value.
|
|
:return: an empty list if the argument is the empty sequence or an `int` instance.
|
|
:raise: an `ElementPathValueError` if the value is not decodable to an integer or if \
|
|
the value is out of bounds.
|
|
"""
|
|
if isinstance(value, string_base_type):
|
|
try:
|
|
result = int(float(value))
|
|
except ValueError:
|
|
raise xpath_error('FORG0001', 'could not convert %r to integer' % value)
|
|
else:
|
|
try:
|
|
result = int(value)
|
|
except ValueError as err:
|
|
raise xpath_error('FORG0001', str(err))
|
|
|
|
if lower_bound is not None and result < lower_bound:
|
|
raise xpath_error('FORG0001', "value %d is too low" % result)
|
|
elif higher_bound is not None and result >= higher_bound:
|
|
raise xpath_error('FORG0001', "value %d is too high" % result)
|
|
return result
|
|
|
|
|
|
@constructor('integer')
|
|
def cast(value):
|
|
return cast_to_integer(value)
|
|
|
|
|
|
@constructor('nonNegativeInteger')
|
|
def cast(value):
|
|
return cast_to_integer(value, 0)
|
|
|
|
|
|
@constructor('positiveInteger')
|
|
def cast(value):
|
|
return cast_to_integer(value, 1)
|
|
|
|
|
|
@constructor('nonPositiveInteger')
|
|
def cast(value):
|
|
return cast_to_integer(value, higher_bound=1)
|
|
|
|
|
|
@constructor('negativeInteger')
|
|
def cast(value, context=None):
|
|
return cast_to_integer(value, higher_bound=0)
|
|
|
|
|
|
@constructor('long')
|
|
def cast(value):
|
|
return cast_to_integer(value, -2**127, 2**127)
|
|
|
|
|
|
@constructor('int')
|
|
def cast(value):
|
|
return cast_to_integer(value, -2**63, 2**63)
|
|
|
|
|
|
@constructor('short')
|
|
def cast(value):
|
|
return cast_to_integer(value, -2**15, 2**15)
|
|
|
|
|
|
@constructor('byte')
|
|
def cast(value):
|
|
return cast_to_integer(value, -2**7, 2**7)
|
|
|
|
|
|
@constructor('unsignedLong')
|
|
def cast(value):
|
|
return cast_to_integer(value, 0, 2**128)
|
|
|
|
|
|
@constructor('unsignedInt')
|
|
def cast(value):
|
|
return cast_to_integer(value, 0, 2**64)
|
|
|
|
|
|
@constructor('unsignedShort')
|
|
def cast(value):
|
|
return cast_to_integer(value, 0, 2**16)
|
|
|
|
|
|
@constructor('unsignedByte')
|
|
def cast(value):
|
|
return cast_to_integer(value, 0, 2**8)
|
|
|
|
|
|
###
|
|
# Constructors for datetime XSD types
|
|
@constructor('date')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, Date10):
|
|
return value
|
|
return Date10.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('gDay')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, XPathGregorianDay):
|
|
return value
|
|
return XPathGregorianDay.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('gMonth')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, XPathGregorianMonth):
|
|
return value
|
|
return XPathGregorianMonth.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('gMonthDay')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, XPathGregorianMonthDay):
|
|
return value
|
|
return XPathGregorianMonthDay.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('gYear')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, XPathGregorianYear):
|
|
return value
|
|
return XPathGregorianYear.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('gYearMonth')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, XPathGregorianYearMonth):
|
|
return value
|
|
return XPathGregorianYearMonth.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@constructor('time')
|
|
def cast(value, tz=None):
|
|
if isinstance(value, Time):
|
|
return value
|
|
return Time.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
@method('date')
|
|
@method('gDay')
|
|
@method('gMonth')
|
|
@method('gMonthDay')
|
|
@method('gYear')
|
|
@method('gYearMonth')
|
|
@method('time')
|
|
def evaluate(self, context=None):
|
|
item = self.get_argument(context)
|
|
if item is None:
|
|
return []
|
|
try:
|
|
return self.cast(item, tz=None if context is None else context.timezone)
|
|
except ValueError as err:
|
|
raise self.error('FOCA0002', str(err))
|
|
except TypeError as err:
|
|
raise self.error('FORG0006', str(err))
|
|
|
|
|
|
###
|
|
# Constructors for time durations XSD types
|
|
@constructor('duration')
|
|
def cast(value):
|
|
return Duration.fromstring(value)
|
|
|
|
|
|
@constructor('yearMonthDuration')
|
|
def cast(value):
|
|
return YearMonthDuration.fromstring(value)
|
|
|
|
|
|
@constructor('dayTimeDuration')
|
|
def cast(value):
|
|
return DayTimeDuration.fromstring(value)
|
|
|
|
|
|
###
|
|
# Constructors for binary XSD types
|
|
@constructor('base64Binary')
|
|
def cast(value, from_literal=False):
|
|
if isinstance(value, UntypedAtomic):
|
|
return codecs.encode(unicode_type(value), 'base64')
|
|
elif not isinstance(value, (bytes, unicode_type)):
|
|
raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value))
|
|
elif not isinstance(value, bytes) or from_literal:
|
|
return codecs.encode(value.encode('ascii'), 'base64')
|
|
elif HEX_BINARY_PATTERN.search(value.decode('utf-8')):
|
|
value = codecs.decode(value, 'hex') if str is not bytes else value
|
|
return codecs.encode(value, 'base64')
|
|
elif NOT_BASE64_BINARY_PATTERN.search(value.decode('utf-8')):
|
|
return codecs.encode(value, 'base64')
|
|
else:
|
|
return value
|
|
|
|
|
|
@constructor('hexBinary')
|
|
def cast(value, from_literal=False):
|
|
if isinstance(value, UntypedAtomic):
|
|
return codecs.encode(unicode_type(value), 'hex')
|
|
elif not isinstance(value, (bytes, unicode_type)):
|
|
raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value))
|
|
elif not isinstance(value, bytes) or from_literal:
|
|
return codecs.encode(value.encode('ascii'), 'hex')
|
|
elif HEX_BINARY_PATTERN.search(value.decode('utf-8')):
|
|
return value if isinstance(value, bytes) or str is bytes else codecs.encode(value.encode('ascii'), 'hex')
|
|
else:
|
|
try:
|
|
value = codecs.decode(value, 'base64')
|
|
except ValueError:
|
|
return codecs.encode(value, 'hex')
|
|
else:
|
|
return codecs.encode(value, 'hex')
|
|
|
|
|
|
@method('base64Binary')
|
|
@method('hexBinary')
|
|
def evaluate(self, context=None):
|
|
item = self.get_argument(context)
|
|
if item is None:
|
|
return []
|
|
try:
|
|
return self.cast(item, self[0].label == 'literal')
|
|
except ElementPathError as err:
|
|
if err.token is None:
|
|
err.token = self
|
|
raise
|
|
except ValueError as err:
|
|
raise self.error('FOCA0002', str(err))
|
|
except TypeError as err:
|
|
raise self.error('FORG0006', str(err))
|
|
|
|
|
|
###
|
|
# Multi role-tokens cases
|
|
#
|
|
|
|
# Case 1: In XPath 2.0 the 'attribute' keyword is used both for attribute:: axis and
|
|
# attribute() node type function.
|
|
#
|
|
# First the XPath1 token class has to be removed from the XPath2 symbol table. Then the
|
|
# symbol has to be registered usually with the same binding power (bp --> lbp, rbp), a
|
|
# multi-value label (using a tuple of values) and a custom pattern. Finally a custom nud
|
|
# or led method is required.
|
|
unregister('attribute')
|
|
register('attribute', lbp=90, rbp=90, label=('function', 'axis'),
|
|
pattern=r'\battribute(?=\s*\:\:|\s*\(\:.*\:\)\s*\:\:|\s*\(|\s*\(\:.*\:\)\()')
|
|
|
|
|
|
@method('attribute')
|
|
def nud(self):
|
|
if self.parser.next_token.symbol == '::':
|
|
self.parser.advance('::')
|
|
self.parser.next_token.expected(
|
|
'(name)', '*', 'text', 'node', 'document-node', 'comment', 'processing-instruction',
|
|
'attribute', 'schema-attribute', 'element', 'schema-element'
|
|
)
|
|
self[:] = self.parser.expression(rbp=90),
|
|
self.label = 'axis'
|
|
else:
|
|
self.parser.advance('(')
|
|
if self.parser.next_token.symbol != ')':
|
|
self[:] = self.parser.expression(5),
|
|
if self.parser.next_token.symbol == ',':
|
|
self.parser.advance(',')
|
|
self[1:] = self.parser.expression(5),
|
|
self.parser.advance(')')
|
|
self.label = 'function'
|
|
return self
|
|
|
|
|
|
@method('attribute')
|
|
def select(self, context=None):
|
|
if context is None:
|
|
return
|
|
elif self.label == 'axis':
|
|
for _ in context.iter_attributes():
|
|
for result in self[0].select(context):
|
|
yield result
|
|
else:
|
|
attribute_name = self[0].evaluate(context) if self else None
|
|
for item in context.iter_attributes():
|
|
if is_attribute_node(item, attribute_name):
|
|
yield context.item[1]
|
|
|
|
|
|
@method('attribute')
|
|
def evaluate(self, context=None):
|
|
if context is not None:
|
|
if is_attribute_node(context.item, self[0].evaluate(context) if self else None):
|
|
return context.item[1]
|
|
|
|
|
|
# Case 2: In XPath 2.0 the 'boolean' keyword is used both for boolean() function and
|
|
# for boolean() constructor.
|
|
def cast_to_boolean(value, context=None):
|
|
if isinstance(value, bool):
|
|
return value
|
|
elif isinstance(value, (int, float, decimal.Decimal)):
|
|
return bool(value)
|
|
elif isinstance(value, UntypedAtomic):
|
|
value = string_base_type(value)
|
|
elif not isinstance(value, string_base_type):
|
|
raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value))
|
|
|
|
if value in ('true', '1'):
|
|
return True
|
|
elif value in ('false', '0'):
|
|
return False
|
|
else:
|
|
raise xpath_error('FOCA0002', "%r: not a boolean value" % value)
|
|
|
|
|
|
unregister('boolean')
|
|
register('boolean', lbp=90, rbp=90, label=('function', 'constructor'),
|
|
pattern=r'\bboolean(?=\s*\(|\s*\(\:.*\:\)\()', cast=staticmethod(cast_to_boolean))
|
|
|
|
|
|
@method('boolean')
|
|
def nud(self):
|
|
self.parser.advance('(')
|
|
if self.parser.next_token.symbol == ')':
|
|
self.wrong_nargs('Too few arguments: expected at least 1 argument')
|
|
self[0:] = self.parser.expression(5),
|
|
if self.parser.next_token.symbol == ',':
|
|
self.wrong_nargs('Too many arguments: expected at most 1 argument')
|
|
self.parser.advance(')')
|
|
self.value = None
|
|
return self
|
|
|
|
|
|
@method('boolean')
|
|
def evaluate(self, context=None):
|
|
if self.label == 'function':
|
|
return self.boolean_value(self[0].get_results(context))
|
|
|
|
# xs:boolean constructor
|
|
item = self.get_argument(context)
|
|
if item is None:
|
|
return []
|
|
try:
|
|
return self.cast(item, context)
|
|
except ElementPathError as err:
|
|
err.token = self
|
|
raise
|
|
|
|
|
|
# Case 3: In XPath 2.0 the 'string' keyword is used both for fn:string() and xs:string().
|
|
unregister('string')
|
|
register('string', lbp=90, rbp=90, label=('function', 'constructor'),
|
|
pattern=r'\bstring(?=\s*\(|\s*\(\:.*\:\)\()', cast=staticmethod(lambda v, c=None: str(v)))
|
|
|
|
|
|
@method('string')
|
|
def nud(self):
|
|
self.parser.advance('(')
|
|
self[0:] = self.parser.expression(5),
|
|
self.parser.advance(')')
|
|
self.value = None
|
|
return self
|
|
|
|
|
|
@method('string')
|
|
def evaluate(self, context=None):
|
|
if self.label == 'function':
|
|
return self.string_value(self.get_argument(context))
|
|
else:
|
|
item = self.get_argument(context)
|
|
return [] if item is None else str(item)
|
|
|
|
|
|
# Case 4 and 5: In XPath 2.0 the XSD 'QName' and 'dateTime' types have special constructor functions so
|
|
# the 'QName' keyword is used both for fn:QName() and xs:QName(), the same for 'dateTime' keyword.
|
|
#
|
|
# In those cases the label at parse time is set by the nud method, in dependence of the number of args.
|
|
#
|
|
def cast_to_qname(value, namespaces=None):
|
|
if not isinstance(value, string_base_type):
|
|
raise xpath_error('FORG0006', 'the argument has an invalid type %r' % type(value))
|
|
|
|
match = QNAME_PATTERN.match(value)
|
|
if match is None:
|
|
raise xpath_error('FOCA0002', 'the argument must be an xs:QName')
|
|
|
|
pfx = match.groupdict()['prefix'] or ''
|
|
if pfx and (not namespaces or pfx not in namespaces):
|
|
raise xpath_error('FONS0004', 'No namespace found for prefix %r' % pfx)
|
|
return value
|
|
|
|
|
|
def cast_to_datetime(value, tz=None):
|
|
return DateTime10.fromstring(value, tzinfo=tz)
|
|
|
|
|
|
register('QName', lbp=90, rbp=90, label=('function', 'constructor'),
|
|
pattern=r'\bQName(?=\s*\(|\s*\(\:.*\:\)\()', cast=staticmethod(cast_to_qname))
|
|
register('dateTime', lbp=90, rbp=90, label=('function', 'constructor'),
|
|
pattern=r'\bdateTime(?=\s*\(|\s*\(\:.*\:\)\()', cast=staticmethod(cast_to_datetime))
|
|
|
|
|
|
@method('QName')
|
|
@method('dateTime')
|
|
def nud(self):
|
|
self.parser.advance('(')
|
|
self[0:] = self.parser.expression(5),
|
|
if self.parser.next_token.symbol == ',':
|
|
self.label = 'function'
|
|
self.parser.advance(',')
|
|
self[1:] = self.parser.expression(5),
|
|
else:
|
|
self.label = 'constructor'
|
|
self.parser.advance(')')
|
|
self.value = None
|
|
return self
|
|
|
|
|
|
@method('QName')
|
|
def evaluate(self, context=None):
|
|
if self.label == 'constructor':
|
|
item = self.get_argument(context)
|
|
if item is None:
|
|
return []
|
|
try:
|
|
return self.cast(item, self.parser.namespaces)
|
|
except ElementPathError as err:
|
|
if err.token is None:
|
|
err.token = self
|
|
raise
|
|
else:
|
|
uri = self.get_argument(context)
|
|
if uri is None:
|
|
uri = ''
|
|
elif not isinstance(uri, string_base_type):
|
|
raise self.error('FORG0006', '1st argument has an invalid type %r' % type(uri))
|
|
|
|
qname = self[1].evaluate(context)
|
|
if not isinstance(qname, string_base_type):
|
|
raise self.error('FORG0006', '2nd argument has an invalid type %r' % type(qname))
|
|
match = QNAME_PATTERN.match(qname)
|
|
if match is None:
|
|
raise self.error('FOCA0002', '2nd argument must be an xs:QName')
|
|
|
|
pfx = match.groupdict()['prefix'] or ''
|
|
if not uri:
|
|
if pfx:
|
|
raise self.error('FOCA0002', 'must be a local name when the parameter URI is empty')
|
|
else:
|
|
try:
|
|
if uri != self.parser.namespaces[pfx]:
|
|
raise self.error('FOCA0002', 'prefix %r is already is used for another namespace' % pfx)
|
|
except KeyError:
|
|
self.parser.namespaces[pfx] = uri
|
|
return qname
|
|
|
|
|
|
@method('dateTime')
|
|
def evaluate(self, context=None):
|
|
if self.label == 'constructor':
|
|
item = self.get_argument(context)
|
|
if item is None:
|
|
return []
|
|
try:
|
|
return self.cast(item, tz=None if context is None else context.timezone)
|
|
except ValueError as err:
|
|
raise self.error('FOCA0002', str(err))
|
|
except TypeError as err:
|
|
raise self.error('FORG0006', str(err))
|
|
else:
|
|
dt = self.get_argument(context, cls=Date10)
|
|
tm = self.get_argument(context, 1, cls=Time)
|
|
if dt is None or tm is None:
|
|
return
|
|
elif dt.tzinfo == tm.tzinfo or tm.tzinfo is None:
|
|
tzinfo = dt.tzinfo
|
|
elif dt.tzinfo is None:
|
|
tzinfo = tm.tzinfo
|
|
else:
|
|
raise self.error('FORG0008')
|
|
return DateTime10(dt.year, dt.month, dt.day, tm.hour, tm.minute, tm.second, tm.microsecond, tzinfo)
|
|
|
|
|
|
XPath2Parser.build_tokenizer() # XPath 2.0 definitions completed, build the tokenizer.
|