1275 lines
40 KiB
Python
1275 lines
40 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
from __future__ import division
|
|
import math
|
|
import decimal
|
|
|
|
from .compat import PY3, string_base_type
|
|
from .exceptions import ElementPathSyntaxError, ElementPathNameError, MissingContextError
|
|
from .datatypes import UntypedAtomic, DayTimeDuration, YearMonthDuration, \
|
|
NumericTypeProxy, ArithmeticTypeProxy, XSD_BUILTIN_TYPES
|
|
from .xpath_context import XPathSchemaContext
|
|
from .tdop_parser import Parser, MultiLabel
|
|
from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
|
|
XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
|
|
from .xpath_token import XPathToken
|
|
from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
|
|
is_xpath_node, is_element_node, is_document_node, is_attribute_node, \
|
|
is_text_node, is_comment_node, is_processing_instruction_node, node_name
|
|
|
|
XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
|
|
u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD")
|
|
XML_NCNAME_PATTERN = u"[{0}][-.0-9\u00B7\u0300-\u036F\u203F-\u2040{0}]*".format(XML_NAME_CHARACTER)
|
|
|
|
|
|
class XPath1Parser(Parser):
|
|
"""
|
|
XPath 1.0 expression parser class. A parser instance represents also the XPath static context.
|
|
With *variables* you can pass a dictionary with the static context's in-scope variables.
|
|
Provide a *namespaces* dictionary argument for mapping namespace prefixes to URI inside
|
|
expressions. If *strict* is set to `False` the parser enables also the parsing of QNames,
|
|
like the ElementPath library.
|
|
|
|
:param namespaces: A dictionary with mapping from namespace prefixes into URIs.
|
|
:param variables: A dictionary with the static context's in-scope variables.
|
|
:param strict: If strict mode is `False` the parser enables parsing of QNames, \
|
|
like the ElementPath library. Default is `True`.
|
|
"""
|
|
token_base_class = XPathToken
|
|
|
|
SYMBOLS = Parser.SYMBOLS | {
|
|
# Axes
|
|
'descendant-or-self', 'following-sibling', 'preceding-sibling',
|
|
'ancestor-or-self', 'descendant', 'attribute', 'following',
|
|
'namespace', 'preceding', 'ancestor', 'parent', 'child', 'self',
|
|
|
|
# Operators
|
|
'and', 'mod', 'div', 'or', '..', '//', '!=', '<=', '>=', '(', ')', '[', ']',
|
|
':', '.', '@', ',', '/', '|', '*', '-', '=', '+', '<', '>', '$', '::',
|
|
|
|
# Node test functions
|
|
'node', 'text', 'comment', 'processing-instruction',
|
|
|
|
# Node set functions
|
|
'last', 'position', 'count', 'id', 'name', 'local-name', 'namespace-uri',
|
|
|
|
# String functions
|
|
'string', 'concat', 'starts-with', 'contains',
|
|
'substring-before', 'substring-after', 'substring',
|
|
'string-length', 'normalize-space', 'translate',
|
|
|
|
# Boolean functions
|
|
'boolean', 'not', 'true', 'false', 'lang',
|
|
|
|
# Number functions
|
|
'number', 'sum', 'floor', 'ceiling', 'round',
|
|
|
|
# Symbols for ElementPath extensions
|
|
'{', '}'
|
|
}
|
|
|
|
DEFAULT_NAMESPACES = XPATH_1_DEFAULT_NAMESPACES
|
|
"""
|
|
The default prefix-to-namespace associations of the XPath class. Those namespaces are updated
|
|
in the instance with the ones passed with the *namespaces* argument.
|
|
"""
|
|
|
|
schema = None # To simplify the schema bind checks in compatibility with XPath2Parser
|
|
|
|
def __init__(self, namespaces=None, variables=None, strict=True, *args, **kwargs):
|
|
super(XPath1Parser, self).__init__()
|
|
self.namespaces = self.DEFAULT_NAMESPACES.copy()
|
|
if namespaces is not None:
|
|
self.namespaces.update(namespaces)
|
|
self.variables = dict(variables if variables is not None else [])
|
|
self.strict = strict
|
|
|
|
@classmethod
|
|
def build_tokenizer(cls, name_pattern=XML_NCNAME_PATTERN):
|
|
super(XPath1Parser, cls).build_tokenizer(name_pattern)
|
|
|
|
@property
|
|
def version(self):
|
|
"""The XPath version string."""
|
|
return '1.0'
|
|
|
|
@property
|
|
def compatibility_mode(self):
|
|
"""XPath 1.0 compatibility mode."""
|
|
return True
|
|
|
|
@property
|
|
def default_namespace(self):
|
|
"""
|
|
The default namespace. For XPath 1.0 this value is always `None` because the default
|
|
namespace is ignored (see https://www.w3.org/TR/1999/REC-xpath-19991116/#node-tests).
|
|
"""
|
|
return
|
|
|
|
@classmethod
|
|
def axis(cls, symbol, bp=80):
|
|
"""Register a token for a symbol that represents an XPath *axis*."""
|
|
def nud_(self):
|
|
self.parser.advance('::')
|
|
self.parser.next_token.expected(
|
|
'(name)', '*', 'text', 'node', 'document-node', 'comment', 'processing-instruction',
|
|
'attribute', 'schema-attribute', 'element', 'schema-element'
|
|
)
|
|
self[:] = self.parser.expression(rbp=bp),
|
|
return self
|
|
|
|
axis_pattern_template = r'\b%s(?=\s*\:\:|\s*\(\:.*\:\)\s*\:\:)'
|
|
try:
|
|
pattern = axis_pattern_template % symbol.strip()
|
|
except AttributeError:
|
|
pattern = axis_pattern_template % getattr(symbol, 'symbol')
|
|
return cls.register(symbol, pattern=pattern, label='axis', lbp=bp, rbp=bp, nud=nud_)
|
|
|
|
@classmethod
|
|
def function(cls, symbol, nargs=None, bp=90):
|
|
"""Registers a token class for a symbol that represents an XPath *function*."""
|
|
def nud_(self):
|
|
self.value = None
|
|
self.parser.advance('(')
|
|
if nargs is None:
|
|
del self[:]
|
|
while True:
|
|
self.append(self.parser.expression(5))
|
|
if self.parser.next_token.symbol != ',':
|
|
break
|
|
self.parser.advance(',')
|
|
self.parser.advance(')')
|
|
return self
|
|
elif nargs == 0:
|
|
self.parser.advance(')')
|
|
return self
|
|
elif isinstance(nargs, (tuple, list)):
|
|
min_args, max_args = nargs
|
|
else:
|
|
min_args = max_args = nargs
|
|
|
|
k = 0
|
|
while k < min_args:
|
|
if self.parser.next_token.symbol == ')':
|
|
msg = 'Too few arguments: expected at least %s arguments' % min_args
|
|
self.wrong_nargs(msg[:-1] if min_args == 1 else msg)
|
|
|
|
self[k:] = self.parser.expression(5),
|
|
k += 1
|
|
if k < min_args:
|
|
self.parser.advance(',')
|
|
|
|
while k < max_args:
|
|
if self.parser.next_token.symbol == ',':
|
|
self.parser.advance(',')
|
|
self[k:] = self.parser.expression(5),
|
|
elif k == 0 and self.parser.next_token.symbol != ')':
|
|
self[k:] = self.parser.expression(5),
|
|
else:
|
|
break
|
|
k += 1
|
|
|
|
if self.parser.next_token.symbol == ',':
|
|
msg = 'Too many arguments: expected at most %s arguments' % max_args
|
|
self.wrong_nargs(msg[:-1] if max_args == 1 else msg)
|
|
|
|
self.parser.advance(')')
|
|
return self
|
|
|
|
pattern = r'\b%s(?=\s*\(|\s*\(\:.*\:\)\()' % symbol
|
|
return cls.register(symbol, pattern=pattern, label='function', lbp=bp, rbp=bp, nud=nud_)
|
|
|
|
def next_is_path_step_token(self):
|
|
return self.next_token.label == 'axis' or self.next_token.symbol in {
|
|
'(integer)', '(string)', '(float)', '(decimal)', '(name)', 'node', 'text', '*',
|
|
'@', '..', '.', '(', '{'
|
|
}
|
|
|
|
def parse(self, source):
|
|
root_token = super(XPath1Parser, self).parse(source)
|
|
try:
|
|
root_token.evaluate() # Static context evaluation
|
|
except MissingContextError:
|
|
pass
|
|
return root_token
|
|
|
|
|
|
##
|
|
# XPath1 definitions
|
|
register = XPath1Parser.register
|
|
literal = XPath1Parser.literal
|
|
nullary = XPath1Parser.nullary
|
|
prefix = XPath1Parser.prefix
|
|
infix = XPath1Parser.infix
|
|
postfix = XPath1Parser.postfix
|
|
method = XPath1Parser.method
|
|
function = XPath1Parser.function
|
|
axis = XPath1Parser.axis
|
|
|
|
|
|
###
|
|
# Simple symbols
|
|
register(',')
|
|
register(')')
|
|
register(']')
|
|
register('::')
|
|
register('}')
|
|
|
|
|
|
###
|
|
# Literals
|
|
literal('(string)')
|
|
literal('(float)')
|
|
literal('(decimal)')
|
|
literal('(integer)')
|
|
literal('(name)', bp=10)
|
|
|
|
|
|
@method('(name)')
|
|
def evaluate(self, context=None):
|
|
if context is None:
|
|
return
|
|
name = self.value
|
|
if name[0] != '{' and self.parser.default_namespace:
|
|
name = u'{%s}%s' % (self.parser.default_namespace, name)
|
|
|
|
if isinstance(context, XPathSchemaContext):
|
|
xsd_type = self.match_xsd_type(context.item, name)
|
|
if xsd_type is not None:
|
|
if isinstance(context.item, AttributeNode):
|
|
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
|
|
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
|
|
else:
|
|
return context.item
|
|
|
|
elif self.xsd_type is None:
|
|
if is_attribute_node(context.item, name):
|
|
return context.item[1]
|
|
elif is_element_node(context.item, name):
|
|
return context.item
|
|
else:
|
|
try:
|
|
if is_attribute_node(context.item, name):
|
|
return self.xsd_type.decode(context.item[1])
|
|
elif is_element_node(context.item, name):
|
|
if self.xsd_type.is_simple():
|
|
return self.xsd_type.decode(context.item)
|
|
else:
|
|
return context.item
|
|
except (TypeError, ValueError):
|
|
self.wrong_context_type("Type %r is not appropriate for the context" % (type(context.item)))
|
|
|
|
|
|
@method('(name)')
|
|
def select(self, context=None):
|
|
if context is None:
|
|
return
|
|
name = self.value
|
|
if name[0] != '{' and self.parser.default_namespace:
|
|
name = u'{%s}%s' % (self.parser.default_namespace, name)
|
|
|
|
if isinstance(context, XPathSchemaContext):
|
|
for item in context.iter_children_or_self():
|
|
xsd_type = self.match_xsd_type(item, name)
|
|
if xsd_type is not None:
|
|
if isinstance(context.item, AttributeNode):
|
|
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
|
|
yield XSD_BUILTIN_TYPES[primitive_type.local_name].value
|
|
else:
|
|
yield context.item
|
|
|
|
elif self.xsd_type is None:
|
|
# Untyped selection
|
|
for item in context.iter_children_or_self():
|
|
if is_attribute_node(item, name):
|
|
yield item[1]
|
|
elif is_element_node(item, name):
|
|
yield item
|
|
else:
|
|
# Typed selection
|
|
for item in context.iter_children_or_self():
|
|
try:
|
|
if is_attribute_node(item, name):
|
|
yield self.xsd_type.decode(item[1])
|
|
elif is_element_node(item, name):
|
|
if self.xsd_type.is_simple():
|
|
yield self.xsd_type.decode(item)
|
|
else:
|
|
yield item
|
|
except (TypeError, ValueError):
|
|
self.wrong_sequence_type("Type %r does not match sequence type of %r" % (self.xsd_type, item))
|
|
|
|
|
|
###
|
|
# Namespace prefix reference
|
|
@method(':', bp=95)
|
|
def led(self, left):
|
|
if self.parser.version == '1.0':
|
|
left.expected('(name)')
|
|
else:
|
|
left.expected('(name)', '*')
|
|
|
|
next_token = self.parser.next_token
|
|
if left.symbol == '(name)':
|
|
try:
|
|
namespace = self.parser.namespaces[left.value]
|
|
except KeyError as err:
|
|
raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err))
|
|
|
|
if next_token.symbol not in ('(name)', '*') and next_token.label not in ('function', 'constructor'):
|
|
next_token.wrong_syntax()
|
|
elif namespace == XPATH_FUNCTIONS_NAMESPACE:
|
|
if next_token.label != 'function':
|
|
next_token.wrong_syntax("An XPath function is expected.")
|
|
elif isinstance(next_token.label, MultiLabel):
|
|
next_token.label = 'function'
|
|
elif namespace == XSD_NAMESPACE:
|
|
if next_token.symbol not in ('(name)', '*') and next_token.label != 'constructor':
|
|
next_token.wrong_syntax("An XSD element or a constructor function is expected.")
|
|
elif isinstance(next_token.label, MultiLabel):
|
|
next_token.label = 'constructor'
|
|
|
|
elif left.symbol == '*' and next_token.symbol != '(name)':
|
|
next_token.wrong_syntax()
|
|
|
|
if self.parser.is_spaced():
|
|
self.wrong_syntax("a QName cannot contains spaces before or after ':'")
|
|
self[:] = left, self.parser.expression(90)
|
|
return self
|
|
|
|
|
|
@method(':')
|
|
def evaluate(self, context=None):
|
|
if self[0].value == '*':
|
|
return
|
|
try:
|
|
namespace = self.parser.namespaces[self[0].value]
|
|
except KeyError as err:
|
|
raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err))
|
|
|
|
if namespace == XPATH_FUNCTIONS_NAMESPACE and self[1].label != 'function':
|
|
self[1].wrong_value("Must be a function")
|
|
elif namespace == XSD_NAMESPACE and self[1].symbol not in ('(name)', '*') and self[1].label != 'constructor':
|
|
self[1].wrong_value("An XSD element or a constructor function is expected.")
|
|
return self[1].evaluate(context)
|
|
|
|
|
|
@method(':')
|
|
def select(self, context=None):
|
|
if self[1].label in ('function', 'constructor'):
|
|
value = self[1].evaluate(context)
|
|
if isinstance(value, list):
|
|
for result in value:
|
|
yield result
|
|
else:
|
|
yield value
|
|
return
|
|
elif self[0].value == '*':
|
|
value = '*:%s' % self[1].value
|
|
else:
|
|
try:
|
|
namespace = self.parser.namespaces[self[0].value]
|
|
except KeyError as err:
|
|
raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err))
|
|
else:
|
|
value = '{%s}%s' % (namespace, self[1].value)
|
|
|
|
if context is not None:
|
|
for item in context.iter_children_or_self():
|
|
if is_attribute_node(item, value):
|
|
yield item[1]
|
|
elif is_element_node(item, value):
|
|
yield item
|
|
|
|
|
|
###
|
|
# Namespace URI as in ElementPath
|
|
@method('{', bp=95)
|
|
def nud(self):
|
|
if self.parser.strict:
|
|
self.unexpected()
|
|
namespace = self.parser.next_token.value + self.parser.raw_advance('}')
|
|
self.parser.advance()
|
|
|
|
next_token = self.parser.next_token
|
|
if next_token.symbol not in ('(name)', '*') and next_token.label != 'function':
|
|
next_token.wrong_syntax()
|
|
elif self.parser.next_token.label != 'function' and namespace == XPATH_FUNCTIONS_NAMESPACE:
|
|
self.parser.next_token.wrong_syntax()
|
|
self[:] = self.parser.symbol_table['(string)'](self.parser, namespace), self.parser.expression(90)
|
|
return self
|
|
|
|
|
|
@method('{')
|
|
def evaluate(self, context=None):
|
|
if self[1].label == 'function':
|
|
return self[1].evaluate(context)
|
|
else:
|
|
return '{%s}%s' % (self[0].value, self[1].value)
|
|
|
|
|
|
@method('{')
|
|
def select(self, context=None):
|
|
if self[1].label == 'function':
|
|
value = self[1].evaluate(context)
|
|
if isinstance(value, list):
|
|
for result in value:
|
|
yield result
|
|
else:
|
|
yield value
|
|
elif context is not None:
|
|
value = '{%s}%s' % (self[0].value, self[1].value)
|
|
for item in context.iter_children_or_self():
|
|
if is_attribute_node(item, value):
|
|
yield item[1]
|
|
elif is_element_node(item, value):
|
|
yield item
|
|
|
|
|
|
###
|
|
# Variables
|
|
@method('$', bp=90)
|
|
def nud(self):
|
|
self.parser.next_token.expected('(name)')
|
|
self[:] = self.parser.expression(rbp=90),
|
|
if self[0].value.startswith('{'):
|
|
self[0].wrong_value("Variable reference requires a simple reference name")
|
|
return self
|
|
|
|
|
|
@method('$')
|
|
def evaluate(self, context=None):
|
|
varname = self[0].value
|
|
if varname in self.parser.variables:
|
|
return self.parser.variables[varname]
|
|
elif context is None:
|
|
return
|
|
elif varname in context.variables:
|
|
return context.variables[varname]
|
|
elif isinstance(context, XPathSchemaContext):
|
|
return
|
|
else:
|
|
raise ElementPathNameError('unknown variable', token=self)
|
|
|
|
|
|
###
|
|
# Nullary operators (use only the context)
|
|
@method(nullary('*'))
|
|
def select(self, context=None):
|
|
if self:
|
|
# Product operator
|
|
item = self.evaluate(context)
|
|
if context is not None:
|
|
context.item = item
|
|
yield item
|
|
elif context is None:
|
|
self.missing_context()
|
|
else:
|
|
# Wildcard literal
|
|
for item in context.iter_children_or_self():
|
|
if context.is_principal_node_kind():
|
|
if is_attribute_node(item):
|
|
yield item[1]
|
|
else:
|
|
yield item
|
|
|
|
|
|
@method(nullary('.'))
|
|
def select(self, context=None):
|
|
if context is None:
|
|
self.missing_context()
|
|
elif context.item is not None:
|
|
yield context.item
|
|
elif is_document_node(context.root):
|
|
yield context.root
|
|
|
|
|
|
@method(nullary('..'))
|
|
def select(self, context=None):
|
|
if context is None:
|
|
self.missing_context()
|
|
else:
|
|
try:
|
|
parent = context.parent_map[context.item]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if is_element_node(parent):
|
|
context.item = parent
|
|
yield parent
|
|
|
|
|
|
###
|
|
# Logical Operators
|
|
@method(infix('or', bp=20))
|
|
def evaluate(self, context=None):
|
|
return bool(self[0].evaluate(context) or self[1].evaluate(context))
|
|
|
|
|
|
@method(infix('and', bp=25))
|
|
def evaluate(self, context=None):
|
|
return bool(self[0].evaluate(context) and self[1].evaluate(context))
|
|
|
|
|
|
@method(infix('=', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 == op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
@method(infix('!=', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 != op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
@method(infix('<', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 < op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
@method(infix('>', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 > op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
@method(infix('<=', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 <= op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
@method(infix('>=', bp=30))
|
|
def evaluate(self, context=None):
|
|
return any(op1 >= op2 for op1, op2 in self.get_comparison_data(context))
|
|
|
|
|
|
###
|
|
# Numerical operators
|
|
prefix('+')
|
|
prefix('-', bp=90)
|
|
|
|
|
|
@method(infix('+', bp=40))
|
|
def evaluate(self, context=None):
|
|
if not self:
|
|
return
|
|
elif len(self) == 1:
|
|
arg = self.get_argument(context, cls=NumericTypeProxy)
|
|
if arg is not None:
|
|
try:
|
|
return +arg
|
|
except TypeError:
|
|
raise self.wrong_type("numeric value is required: %r" % arg)
|
|
else:
|
|
op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy)
|
|
if op1 is not None:
|
|
try:
|
|
return op1 + op2
|
|
except TypeError as err:
|
|
raise self.wrong_type(str(err))
|
|
|
|
|
|
@method(infix('-', bp=40))
|
|
def evaluate(self, context=None):
|
|
if len(self) == 1:
|
|
arg = self.get_argument(context, cls=NumericTypeProxy)
|
|
if arg is not None:
|
|
try:
|
|
return -arg
|
|
except TypeError:
|
|
raise self.wrong_type("numeric value is required: %r" % arg)
|
|
else:
|
|
op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy)
|
|
if op1 is not None:
|
|
try:
|
|
return op1 - op2
|
|
except TypeError as err:
|
|
raise self.wrong_type(str(err))
|
|
|
|
|
|
@method(infix('*', bp=45))
|
|
def evaluate(self, context=None):
|
|
if self:
|
|
op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy)
|
|
if op1 is not None:
|
|
try:
|
|
return op1 * op2
|
|
except TypeError as err:
|
|
raise self.wrong_type(str(err))
|
|
|
|
|
|
@method(infix('div', bp=45))
|
|
def evaluate(self, context=None):
|
|
dividend, divisor = self.get_operands(context, cls=ArithmeticTypeProxy)
|
|
if dividend is None:
|
|
return
|
|
elif divisor != 0:
|
|
try:
|
|
return dividend / divisor
|
|
except TypeError as err:
|
|
raise self.wrong_type(str(err))
|
|
elif dividend == 0:
|
|
return float('nan')
|
|
elif dividend > 0:
|
|
return float('inf')
|
|
else:
|
|
return float('-inf')
|
|
|
|
|
|
@method(infix('mod', bp=45))
|
|
def evaluate(self, context=None):
|
|
op1, op2 = self.get_operands(context, cls=NumericTypeProxy)
|
|
if op1 is not None:
|
|
try:
|
|
return op1 % op2
|
|
except TypeError as err:
|
|
raise self.wrong_type(str(err))
|
|
|
|
|
|
###
|
|
# Union expressions
|
|
@method(infix('|', bp=50))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
results = {item for k in range(2) for item in self[k].select(context.copy())}
|
|
for item in context.iter():
|
|
if item in results:
|
|
yield item
|
|
|
|
|
|
###
|
|
# Path expressions
|
|
@method('//', bp=80)
|
|
@method('/', bp=80)
|
|
def nud(self):
|
|
next_token = self.parser.next_token
|
|
if next_token.symbol == '(end)' and self.symbol == '/':
|
|
return self
|
|
elif not self.parser.next_is_path_step_token():
|
|
next_token.wrong_syntax()
|
|
self[:] = self.parser.expression(80),
|
|
return self
|
|
|
|
|
|
@method('//', bp=80)
|
|
@method('/', bp=80)
|
|
def led(self, left):
|
|
if not self.parser.next_is_path_step_token():
|
|
self.parser.next_token.wrong_syntax()
|
|
self[:] = left, self.parser.expression(80)
|
|
return self
|
|
|
|
|
|
@method('/')
|
|
def select(self, context=None):
|
|
"""
|
|
Child path expression. Selects child:: axis as default (when bind to '*' or '(name)').
|
|
"""
|
|
if context is None:
|
|
return
|
|
elif not self:
|
|
if is_document_node(context.root):
|
|
yield context.root
|
|
elif len(self) == 1:
|
|
context.item = None
|
|
for result in self[0].select(context):
|
|
yield result
|
|
else:
|
|
items = set()
|
|
left_results = list(self[0].select(context))
|
|
context.size = len(left_results)
|
|
for context.position, context.item in enumerate(left_results):
|
|
if not is_element_node(context.item):
|
|
self.wrong_type("left operand must returns element nodes: {}".format(context.item))
|
|
for result in self[1].select(context):
|
|
if is_etree_element(result) or isinstance(result, tuple):
|
|
if result not in items:
|
|
yield result
|
|
items.add(result)
|
|
else:
|
|
yield result
|
|
|
|
|
|
@method('/')
|
|
def evaluate(self, context=None):
|
|
"""
|
|
General evaluation method for path operators, that may returns the a single value or None.
|
|
"""
|
|
if context is not None:
|
|
selector = iter(self.select(context))
|
|
try:
|
|
value = next(selector)
|
|
except StopIteration:
|
|
return
|
|
else:
|
|
try:
|
|
next(selector)
|
|
except StopIteration:
|
|
return self.data_value(value)
|
|
else:
|
|
self.wrong_context_type("atomized operand is a sequence of length greater than one")
|
|
|
|
|
|
@method('//')
|
|
def select(self, context=None):
|
|
if context is None:
|
|
return
|
|
elif len(self) == 1:
|
|
for _ in context.iter_descendants(axis='descendant-or-self'):
|
|
for result in self[0].select(context):
|
|
yield result
|
|
else:
|
|
for elem in self[0].select(context):
|
|
if not is_element_node(elem):
|
|
self.wrong_type("left operand must returns element nodes: %r" % elem)
|
|
for _ in context.iter_descendants(item=elem, axis='descendant-or-self'):
|
|
for result in self[1].select(context):
|
|
yield result
|
|
|
|
|
|
###
|
|
# Predicate filters
|
|
@method('[', bp=75)
|
|
def led(self, left):
|
|
self.parser.next_token.unexpected(']')
|
|
self[:] = left, self.parser.expression()
|
|
self.parser.advance(']')
|
|
return self
|
|
|
|
|
|
@method('[')
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
left_results = list(self[0].select(context))
|
|
context.size = len(left_results)
|
|
for context.position, context.item in enumerate(left_results):
|
|
predicate = list(self[1].select(context.copy()))
|
|
if len(predicate) == 1 and not isinstance(predicate[0], bool) and \
|
|
isinstance(predicate[0], (int, float)):
|
|
if context.position == predicate[0] - 1:
|
|
yield context.item
|
|
elif self.boolean_value(predicate):
|
|
yield context.item
|
|
|
|
|
|
###
|
|
# Parenthesized expressions
|
|
@method('(', bp=100)
|
|
def nud(self):
|
|
self.parser.next_token.unexpected(')')
|
|
self[:] = self.parser.expression(),
|
|
self.parser.advance(')')
|
|
return self # Skip self!! (remove a redundant level from selection/evaluation)
|
|
|
|
|
|
@method('(')
|
|
def evaluate(self, context=None):
|
|
return self[0].evaluate(context)
|
|
|
|
|
|
@method('(')
|
|
def select(self, context=None):
|
|
return self[0].select(context)
|
|
|
|
|
|
###
|
|
# Forward Axes
|
|
@method(axis('self'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for _ in context.iter_self():
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('child'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for _ in context.iter_children_or_self(child_axis=True):
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('descendant'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
item = context.item
|
|
for _ in context.iter_descendants(axis=self.symbol):
|
|
if item is not context.item:
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('descendant-or-self'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for _ in context.iter_descendants(axis=self.symbol):
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('following-sibling'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
if is_element_node(context.item):
|
|
item = context.item
|
|
for elem in context.iter_parent(axis=self.symbol):
|
|
follows = False
|
|
for child in context.iter_children_or_self(elem, child_axis=True):
|
|
if follows:
|
|
for result in self[0].select(context):
|
|
yield result
|
|
elif item is child:
|
|
follows = True
|
|
|
|
|
|
@method(axis('following'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
descendants = set(context.iter_descendants(axis=self.symbol))
|
|
item = context.item
|
|
follows = False
|
|
for elem in context.iter_descendants(item=context.root, axis=self.symbol):
|
|
if follows:
|
|
if elem not in descendants:
|
|
for result in self[0].select(context):
|
|
yield result
|
|
elif item is elem:
|
|
follows = True
|
|
|
|
|
|
@method('@', bp=80)
|
|
def nud(self):
|
|
self[:] = self.parser.expression(rbp=80),
|
|
if self[0].symbol not in ('*', '(name)', ':'):
|
|
raise ElementPathSyntaxError("invalid attribute specification for XPath.")
|
|
return self
|
|
|
|
|
|
@method('@')
|
|
@method(axis('attribute'))
|
|
def select(self, context=None):
|
|
if context is None:
|
|
self.missing_context()
|
|
|
|
for _ in context.iter_attributes():
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('namespace'))
|
|
def select(self, context=None):
|
|
if context is not None and is_element_node(context.item):
|
|
elem = context.item
|
|
namespaces = self.parser.namespaces
|
|
|
|
for prefix_, uri in namespaces.items():
|
|
context.item = NamespaceNode(prefix_, uri)
|
|
yield context.item
|
|
|
|
if hasattr(elem, 'nsmap'):
|
|
# Maybe an lxml's Element: don't use parser namespaces for axis.
|
|
for prefix_, uri in elem.nsmap.items():
|
|
if prefix_ not in namespaces:
|
|
context.item = NamespaceNode(prefix_, uri)
|
|
yield context.item
|
|
|
|
|
|
###
|
|
# Reverse Axes
|
|
@method(axis('parent'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for _ in context.iter_parent(axis=self.symbol):
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('ancestor'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
results = [
|
|
item
|
|
for _ in context.iter_ancestors(axis=self.symbol)
|
|
for item in self[0].select(context)
|
|
]
|
|
for result in reversed(results):
|
|
context.item = result
|
|
yield result
|
|
|
|
|
|
@method(axis('ancestor-or-self'))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
item = context.item
|
|
for elem in reversed(list(context.iter_ancestors(axis=self.symbol))):
|
|
context.item = elem
|
|
yield elem
|
|
yield item
|
|
|
|
|
|
@method(axis('preceding-sibling'))
|
|
def select(self, context=None):
|
|
if context is not None and is_element_node(context.item):
|
|
item = context.item
|
|
for parent in context.iter_parent(axis=self.symbol):
|
|
for child in parent:
|
|
if child is item:
|
|
break
|
|
else:
|
|
context.item = child
|
|
for result in self[0].select(context):
|
|
yield result
|
|
|
|
|
|
@method(axis('preceding'))
|
|
def select(self, context=None):
|
|
if context is not None and is_element_node(context.item):
|
|
elem = context.item
|
|
ancestors = set(context.iter_ancestors(axis=self.symbol))
|
|
for e in context.root.iter():
|
|
if e is elem:
|
|
break
|
|
if e not in ancestors:
|
|
context.item = e
|
|
yield e
|
|
|
|
|
|
###
|
|
# Node types
|
|
@method(function('node', nargs=0))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for item in context.iter_children_or_self():
|
|
if item is None:
|
|
yield context.root
|
|
elif is_xpath_node(item):
|
|
yield item
|
|
|
|
|
|
@method(function('processing-instruction', nargs=(0, 1)))
|
|
def evaluate(self, context=None):
|
|
if context and is_processing_instruction_node(context.item):
|
|
return context.item
|
|
|
|
|
|
@method(function('comment', nargs=0))
|
|
def evaluate(self, context=None):
|
|
if context and is_comment_node(context.item):
|
|
return context.item
|
|
|
|
|
|
@method(function('text', nargs=0))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
for item in context.iter_children_or_self():
|
|
if item is None:
|
|
yield context.root
|
|
elif is_text_node(item):
|
|
yield item
|
|
|
|
|
|
###
|
|
# Node set functions
|
|
@method(function('last', nargs=0))
|
|
def evaluate(self, context=None):
|
|
return context.size if context is not None else 0
|
|
|
|
|
|
@method(function('position', nargs=0))
|
|
def evaluate(self, context=None):
|
|
return context.position + 1 if context is not None else 0
|
|
|
|
|
|
@method(function('count', nargs=1))
|
|
def evaluate(self, context=None):
|
|
results = self[0].evaluate(context)
|
|
if isinstance(results, list):
|
|
return len(results)
|
|
elif results is not None:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
|
|
@method(function('id', nargs=1))
|
|
def select(self, context=None):
|
|
if context is not None:
|
|
value = self[0].evaluate(context)
|
|
item = context.item
|
|
if is_element_node(item):
|
|
for elem in item.iter():
|
|
if elem.get(XML_ID) == value:
|
|
yield elem
|
|
|
|
|
|
@method(function('name', nargs=(0, 1)))
|
|
@method(function('local-name', nargs=(0, 1)))
|
|
@method(function('namespace-uri', nargs=(0, 1)))
|
|
def evaluate(self, context=None):
|
|
name = node_name(self.get_argument(context, default_to_context=True))
|
|
if name is None:
|
|
return ''
|
|
|
|
symbol = self.symbol
|
|
if symbol == 'name':
|
|
return qname_to_prefixed(name, self.parser.namespaces)
|
|
elif not name or name[0] != '{':
|
|
return name if symbol == 'local-name' else ''
|
|
elif symbol == 'local-name':
|
|
return name.split('}')[1]
|
|
elif symbol == 'namespace-uri':
|
|
return name.split('}')[0][1:]
|
|
|
|
|
|
###
|
|
# String functions
|
|
@method(function('string', nargs=1))
|
|
def evaluate(self, context=None):
|
|
return self.string_value(self.get_argument(context))
|
|
|
|
|
|
@method(function('contains', nargs=2))
|
|
def evaluate(self, context=None):
|
|
arg1 = self.get_argument(context, default='', cls=string_base_type)
|
|
arg2 = self.get_argument(context, index=1, default='', cls=string_base_type)
|
|
return arg2 in arg1
|
|
|
|
|
|
@method(function('concat'))
|
|
def evaluate(self, context=None):
|
|
return ''.join(self.string_value(self.get_argument(context, index=k))
|
|
for k in range(len(self)))
|
|
|
|
|
|
@method(function('string-length', nargs=1))
|
|
def evaluate(self, context=None):
|
|
return len(self.get_argument(context, default_to_context=True, default='', cls=string_base_type))
|
|
|
|
|
|
@method(function('normalize-space', nargs=1))
|
|
def evaluate(self, context=None):
|
|
if self.parser.version == '1.0':
|
|
arg = self.string_value(self.get_argument(context, default_to_context=True, default=''))
|
|
else:
|
|
arg = self.get_argument(context, default_to_context=True, default='', cls=string_base_type)
|
|
return ' '.join(arg.strip().split())
|
|
|
|
|
|
@method(function('starts-with', nargs=2))
|
|
def evaluate(self, context=None):
|
|
arg1 = self.get_argument(context, default='', cls=string_base_type)
|
|
arg2 = self.get_argument(context, index=1, default='', cls=string_base_type)
|
|
return arg1.startswith(arg2)
|
|
|
|
|
|
@method(function('translate', nargs=3))
|
|
def evaluate(self, context=None):
|
|
arg = self.get_argument(context, default='', cls=string_base_type)
|
|
map_string = self.get_argument(context, index=1, default='', cls=string_base_type)
|
|
trans_string = self.get_argument(context, index=2, default='', cls=string_base_type)
|
|
|
|
if not PY3:
|
|
import string
|
|
maketrans = getattr(string, 'maketrans')
|
|
arg = arg.encode('utf-8')
|
|
map_string = map_string.encode('utf-8')
|
|
trans_string = trans_string.encode('utf-8')
|
|
else:
|
|
maketrans = str.maketrans
|
|
|
|
if len(map_string) == len(trans_string):
|
|
return arg.translate(maketrans(map_string, trans_string))
|
|
elif len(map_string) > len(trans_string):
|
|
k = len(trans_string)
|
|
if PY3:
|
|
return arg.translate(maketrans(map_string[:k], trans_string, map_string[k:]))
|
|
for c in map_string[k:]:
|
|
arg = arg.replace(c, '')
|
|
return arg.translate(maketrans(map_string[:k], trans_string))
|
|
else:
|
|
self.wrong_value("the third argument must have a length less or equal than the second")
|
|
|
|
|
|
@method(function('substring', nargs=(2, 3)))
|
|
def evaluate(self, context=None):
|
|
item = self.get_argument(context, default='', cls=string_base_type)
|
|
start = self.get_argument(context, index=1)
|
|
try:
|
|
if math.isnan(start) or math.isinf(start):
|
|
return ''
|
|
except TypeError:
|
|
self.wrong_type("the second argument must be xs:numeric")
|
|
else:
|
|
start = int(round(start)) - 1
|
|
|
|
if len(self) == 2:
|
|
return '' if item is None else item[max(start, 0):]
|
|
else:
|
|
length = self.get_argument(context, index=2)
|
|
try:
|
|
if math.isnan(length) or length <= 0:
|
|
return ''
|
|
except TypeError:
|
|
self.wrong_type("the third argument must be xs:numeric")
|
|
|
|
if item is None:
|
|
return ''
|
|
elif math.isinf(length):
|
|
return item[max(start, 0):]
|
|
else:
|
|
stop = start + int(round(length))
|
|
return '' if item is None else item[slice(max(start, 0), max(stop, 0))]
|
|
|
|
|
|
@method(function('substring-before', nargs=2))
|
|
@method(function('substring-after', nargs=2))
|
|
def evaluate(self, context=None):
|
|
arg1 = self.get_argument(context, default='', cls=string_base_type)
|
|
arg2 = self.get_argument(context, index=1, default='', cls=string_base_type)
|
|
if arg1 is None:
|
|
return ''
|
|
|
|
index = 0
|
|
try:
|
|
index = arg1.find(arg2)
|
|
except AttributeError:
|
|
self.wrong_type("the first argument must be a string")
|
|
except TypeError:
|
|
self.wrong_type("the second argument must be a string")
|
|
|
|
if self.symbol == 'substring-before':
|
|
return arg1[:index]
|
|
else:
|
|
return arg1[index + len(arg2):]
|
|
|
|
|
|
###
|
|
# Boolean functions
|
|
@method(function('boolean', nargs=1))
|
|
def evaluate(self, context=None):
|
|
return self.boolean_value(self[0].get_results(context))
|
|
|
|
|
|
@method(function('not', nargs=1))
|
|
def evaluate(self, context=None):
|
|
return not self.boolean_value(self[0].get_results(context))
|
|
|
|
|
|
@method(function('true', nargs=0))
|
|
def evaluate(self, context=None):
|
|
return True
|
|
|
|
|
|
@method(function('false', nargs=0))
|
|
def evaluate(self, context=None):
|
|
return False
|
|
|
|
|
|
@method(function('lang', nargs=1))
|
|
def evaluate(self, context=None):
|
|
if context is None:
|
|
return
|
|
elif not is_element_node(context.item):
|
|
return False
|
|
else:
|
|
try:
|
|
lang = context.item.attrib[XML_LANG].strip()
|
|
except KeyError:
|
|
for elem in context.iter_ancestor():
|
|
if XML_LANG in elem.attrib:
|
|
lang = elem.attrib[XML_LANG]
|
|
break
|
|
else:
|
|
return False
|
|
|
|
if '-' in lang:
|
|
lang, _ = lang.split('-')
|
|
return lang.lower() == self[0].evaluate().lower()
|
|
|
|
|
|
###
|
|
# Number functions
|
|
@method(function('number', nargs=(0, 1)))
|
|
def evaluate(self, context=None):
|
|
arg = self.get_argument(context, default_to_context=True)
|
|
try:
|
|
return float(self.string_value(arg) if is_xpath_node(arg) else arg)
|
|
except (TypeError, ValueError):
|
|
return float('nan')
|
|
|
|
|
|
@method(function('sum', nargs=(1, 2)))
|
|
def evaluate(self, context=None):
|
|
values = [self.number_value(x) if isinstance(x, UntypedAtomic) else x
|
|
for x in self[0].select(context)]
|
|
if not values:
|
|
zero = 0 if len(self) == 1 else self.get_argument(context, index=1)
|
|
return [] if zero is None else zero
|
|
elif any(isinstance(x, float) and math.isnan(x) for x in values):
|
|
return float('nan')
|
|
|
|
if any(isinstance(x, DayTimeDuration) for x in values) or \
|
|
all(isinstance(x, YearMonthDuration) for x in values):
|
|
return sum(values)
|
|
|
|
try:
|
|
return sum(self.number_value(x) for x in values)
|
|
except TypeError:
|
|
if self.parser.version == '1.0':
|
|
return float('nan')
|
|
raise self.error('FORG0006')
|
|
|
|
|
|
@method(function('ceiling', nargs=1))
|
|
@method(function('floor', nargs=1))
|
|
def evaluate(self, context=None):
|
|
arg = self.get_argument(context)
|
|
if arg is None:
|
|
return float('nan') if self.parser.version == '1.0' else []
|
|
elif is_xpath_node(arg) or self.parser.compatibility_mode:
|
|
arg = self.number_value(arg)
|
|
|
|
if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
|
|
return arg
|
|
|
|
try:
|
|
return math.floor(arg) if self.symbol == 'floor' else math.ceil(arg)
|
|
except TypeError as err:
|
|
self.wrong_type(str(err))
|
|
|
|
|
|
@method(function('round', nargs=1))
|
|
def evaluate(self, context=None):
|
|
arg = self.get_argument(context)
|
|
if arg is None:
|
|
return float('nan') if self.parser.version == '1.0' else []
|
|
elif is_xpath_node(arg) or self.parser.compatibility_mode:
|
|
arg = self.number_value(arg)
|
|
|
|
if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
|
|
return arg
|
|
|
|
try:
|
|
number = decimal.Decimal(arg)
|
|
if number > 0:
|
|
return float(number.quantize(decimal.Decimal('1'), rounding='ROUND_HALF_UP'))
|
|
elif PY3:
|
|
return float(round(number))
|
|
else:
|
|
return float(number.quantize(decimal.Decimal('1'), rounding='ROUND_HALF_DOWN'))
|
|
except TypeError as err:
|
|
self.wrong_type(str(err))
|
|
except decimal.DecimalException as err:
|
|
self.wrong_value(str(err))
|
|
|
|
|
|
register('(end)')
|
|
XPath1Parser.build_tokenizer()
|