# -*- coding: utf-8 -*- # # Copyright (c), 2018-2019, SISSA (International School for Advanced Studies). # All rights reserved. # This file is distributed under the terms of the MIT License. # See the file 'LICENSE' in the root directory of the present # distribution, or http://opensource.org/licenses/MIT. # # @author Davide Brunato # from __future__ import division import math import decimal from .compat import PY3, string_base_type from .exceptions import ElementPathSyntaxError, ElementPathNameError, MissingContextError from .datatypes import UntypedAtomic, DayTimeDuration, YearMonthDuration, \ NumericTypeProxy, ArithmeticTypeProxy, XSD_BUILTIN_TYPES from .xpath_context import XPathSchemaContext from .tdop_parser import Parser, MultiLabel from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \ XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed from .xpath_token import XPathToken from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \ is_xpath_node, is_element_node, is_document_node, is_attribute_node, \ is_text_node, is_comment_node, is_processing_instruction_node, node_name XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF" u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD") XML_NCNAME_PATTERN = u"[{0}][-.0-9\u00B7\u0300-\u036F\u203F-\u2040{0}]*".format(XML_NAME_CHARACTER) class XPath1Parser(Parser): """ XPath 1.0 expression parser class. A parser instance represents also the XPath static context. With *variables* you can pass a dictionary with the static context's in-scope variables. Provide a *namespaces* dictionary argument for mapping namespace prefixes to URI inside expressions. If *strict* is set to `False` the parser enables also the parsing of QNames, like the ElementPath library. :param namespaces: A dictionary with mapping from namespace prefixes into URIs. :param variables: A dictionary with the static context's in-scope variables. :param strict: If strict mode is `False` the parser enables parsing of QNames, \ like the ElementPath library. Default is `True`. """ token_base_class = XPathToken SYMBOLS = Parser.SYMBOLS | { # Axes 'descendant-or-self', 'following-sibling', 'preceding-sibling', 'ancestor-or-self', 'descendant', 'attribute', 'following', 'namespace', 'preceding', 'ancestor', 'parent', 'child', 'self', # Operators 'and', 'mod', 'div', 'or', '..', '//', '!=', '<=', '>=', '(', ')', '[', ']', ':', '.', '@', ',', '/', '|', '*', '-', '=', '+', '<', '>', '$', '::', # Node test functions 'node', 'text', 'comment', 'processing-instruction', # Node set functions 'last', 'position', 'count', 'id', 'name', 'local-name', 'namespace-uri', # String functions 'string', 'concat', 'starts-with', 'contains', 'substring-before', 'substring-after', 'substring', 'string-length', 'normalize-space', 'translate', # Boolean functions 'boolean', 'not', 'true', 'false', 'lang', # Number functions 'number', 'sum', 'floor', 'ceiling', 'round', # Symbols for ElementPath extensions '{', '}' } DEFAULT_NAMESPACES = XPATH_1_DEFAULT_NAMESPACES """ The default prefix-to-namespace associations of the XPath class. Those namespaces are updated in the instance with the ones passed with the *namespaces* argument. """ schema = None # To simplify the schema bind checks in compatibility with XPath2Parser def __init__(self, namespaces=None, variables=None, strict=True, *args, **kwargs): super(XPath1Parser, self).__init__() self.namespaces = self.DEFAULT_NAMESPACES.copy() if namespaces is not None: self.namespaces.update(namespaces) self.variables = dict(variables if variables is not None else []) self.strict = strict @classmethod def build_tokenizer(cls, name_pattern=XML_NCNAME_PATTERN): super(XPath1Parser, cls).build_tokenizer(name_pattern) @property def version(self): """The XPath version string.""" return '1.0' @property def compatibility_mode(self): """XPath 1.0 compatibility mode.""" return True @property def default_namespace(self): """ The default namespace. For XPath 1.0 this value is always `None` because the default namespace is ignored (see https://www.w3.org/TR/1999/REC-xpath-19991116/#node-tests). """ return @classmethod def axis(cls, symbol, bp=80): """Register a token for a symbol that represents an XPath *axis*.""" def nud_(self): self.parser.advance('::') self.parser.next_token.expected( '(name)', '*', 'text', 'node', 'document-node', 'comment', 'processing-instruction', 'attribute', 'schema-attribute', 'element', 'schema-element' ) self[:] = self.parser.expression(rbp=bp), return self axis_pattern_template = r'\b%s(?=\s*\:\:|\s*\(\:.*\:\)\s*\:\:)' try: pattern = axis_pattern_template % symbol.strip() except AttributeError: pattern = axis_pattern_template % getattr(symbol, 'symbol') return cls.register(symbol, pattern=pattern, label='axis', lbp=bp, rbp=bp, nud=nud_) @classmethod def function(cls, symbol, nargs=None, bp=90): """Registers a token class for a symbol that represents an XPath *function*.""" def nud_(self): self.value = None self.parser.advance('(') if nargs is None: del self[:] while True: self.append(self.parser.expression(5)) if self.parser.next_token.symbol != ',': break self.parser.advance(',') self.parser.advance(')') return self elif nargs == 0: self.parser.advance(')') return self elif isinstance(nargs, (tuple, list)): min_args, max_args = nargs else: min_args = max_args = nargs k = 0 while k < min_args: if self.parser.next_token.symbol == ')': msg = 'Too few arguments: expected at least %s arguments' % min_args self.wrong_nargs(msg[:-1] if min_args == 1 else msg) self[k:] = self.parser.expression(5), k += 1 if k < min_args: self.parser.advance(',') while k < max_args: if self.parser.next_token.symbol == ',': self.parser.advance(',') self[k:] = self.parser.expression(5), elif k == 0 and self.parser.next_token.symbol != ')': self[k:] = self.parser.expression(5), else: break k += 1 if self.parser.next_token.symbol == ',': msg = 'Too many arguments: expected at most %s arguments' % max_args self.wrong_nargs(msg[:-1] if max_args == 1 else msg) self.parser.advance(')') return self pattern = r'\b%s(?=\s*\(|\s*\(\:.*\:\)\()' % symbol return cls.register(symbol, pattern=pattern, label='function', lbp=bp, rbp=bp, nud=nud_) def next_is_path_step_token(self): return self.next_token.label == 'axis' or self.next_token.symbol in { '(integer)', '(string)', '(float)', '(decimal)', '(name)', 'node', 'text', '*', '@', '..', '.', '(', '{' } def parse(self, source): root_token = super(XPath1Parser, self).parse(source) try: root_token.evaluate() # Static context evaluation except MissingContextError: pass return root_token ## # XPath1 definitions register = XPath1Parser.register literal = XPath1Parser.literal nullary = XPath1Parser.nullary prefix = XPath1Parser.prefix infix = XPath1Parser.infix postfix = XPath1Parser.postfix method = XPath1Parser.method function = XPath1Parser.function axis = XPath1Parser.axis ### # Simple symbols register(',') register(')') register(']') register('::') register('}') ### # Literals literal('(string)') literal('(float)') literal('(decimal)') literal('(integer)') literal('(name)', bp=10) @method('(name)') def evaluate(self, context=None): if context is None: return name = self.value if name[0] != '{' and self.parser.default_namespace: name = u'{%s}%s' % (self.parser.default_namespace, name) if isinstance(context, XPathSchemaContext): xsd_type = self.match_xsd_type(context.item, name) if xsd_type is not None: if isinstance(context.item, AttributeNode): primitive_type = self.parser.schema.get_primitive_type(xsd_type) return XSD_BUILTIN_TYPES[primitive_type.local_name].value else: return context.item elif self.xsd_type is None: if is_attribute_node(context.item, name): return context.item[1] elif is_element_node(context.item, name): return context.item else: try: if is_attribute_node(context.item, name): return self.xsd_type.decode(context.item[1]) elif is_element_node(context.item, name): if self.xsd_type.is_simple(): return self.xsd_type.decode(context.item) else: return context.item except (TypeError, ValueError): self.wrong_context_type("Type %r is not appropriate for the context" % (type(context.item))) @method('(name)') def select(self, context=None): if context is None: return name = self.value if name[0] != '{' and self.parser.default_namespace: name = u'{%s}%s' % (self.parser.default_namespace, name) if isinstance(context, XPathSchemaContext): for item in context.iter_children_or_self(): xsd_type = self.match_xsd_type(item, name) if xsd_type is not None: if isinstance(context.item, AttributeNode): primitive_type = self.parser.schema.get_primitive_type(xsd_type) yield XSD_BUILTIN_TYPES[primitive_type.local_name].value else: yield context.item elif self.xsd_type is None: # Untyped selection for item in context.iter_children_or_self(): if is_attribute_node(item, name): yield item[1] elif is_element_node(item, name): yield item else: # Typed selection for item in context.iter_children_or_self(): try: if is_attribute_node(item, name): yield self.xsd_type.decode(item[1]) elif is_element_node(item, name): if self.xsd_type.is_simple(): yield self.xsd_type.decode(item) else: yield item except (TypeError, ValueError): self.wrong_sequence_type("Type %r does not match sequence type of %r" % (self.xsd_type, item)) ### # Namespace prefix reference @method(':', bp=95) def led(self, left): if self.parser.version == '1.0': left.expected('(name)') else: left.expected('(name)', '*') next_token = self.parser.next_token if left.symbol == '(name)': try: namespace = self.parser.namespaces[left.value] except KeyError as err: raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err)) if next_token.symbol not in ('(name)', '*') and next_token.label not in ('function', 'constructor'): next_token.wrong_syntax() elif namespace == XPATH_FUNCTIONS_NAMESPACE: if next_token.label != 'function': next_token.wrong_syntax("An XPath function is expected.") elif isinstance(next_token.label, MultiLabel): next_token.label = 'function' elif namespace == XSD_NAMESPACE: if next_token.symbol not in ('(name)', '*') and next_token.label != 'constructor': next_token.wrong_syntax("An XSD element or a constructor function is expected.") elif isinstance(next_token.label, MultiLabel): next_token.label = 'constructor' elif left.symbol == '*' and next_token.symbol != '(name)': next_token.wrong_syntax() if self.parser.is_spaced(): self.wrong_syntax("a QName cannot contains spaces before or after ':'") self[:] = left, self.parser.expression(90) return self @method(':') def evaluate(self, context=None): if self[0].value == '*': return try: namespace = self.parser.namespaces[self[0].value] except KeyError as err: raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err)) if namespace == XPATH_FUNCTIONS_NAMESPACE and self[1].label != 'function': self[1].wrong_value("Must be a function") elif namespace == XSD_NAMESPACE and self[1].symbol not in ('(name)', '*') and self[1].label != 'constructor': self[1].wrong_value("An XSD element or a constructor function is expected.") return self[1].evaluate(context) @method(':') def select(self, context=None): if self[1].label in ('function', 'constructor'): value = self[1].evaluate(context) if isinstance(value, list): for result in value: yield result else: yield value return elif self[0].value == '*': value = '*:%s' % self[1].value else: try: namespace = self.parser.namespaces[self[0].value] except KeyError as err: raise self.error('FONS0004', 'No namespace found for prefix %s' % str(err)) else: value = '{%s}%s' % (namespace, self[1].value) if context is not None: for item in context.iter_children_or_self(): if is_attribute_node(item, value): yield item[1] elif is_element_node(item, value): yield item ### # Namespace URI as in ElementPath @method('{', bp=95) def nud(self): if self.parser.strict: self.unexpected() namespace = self.parser.next_token.value + self.parser.raw_advance('}') self.parser.advance() next_token = self.parser.next_token if next_token.symbol not in ('(name)', '*') and next_token.label != 'function': next_token.wrong_syntax() elif self.parser.next_token.label != 'function' and namespace == XPATH_FUNCTIONS_NAMESPACE: self.parser.next_token.wrong_syntax() self[:] = self.parser.symbol_table['(string)'](self.parser, namespace), self.parser.expression(90) return self @method('{') def evaluate(self, context=None): if self[1].label == 'function': return self[1].evaluate(context) else: return '{%s}%s' % (self[0].value, self[1].value) @method('{') def select(self, context=None): if self[1].label == 'function': value = self[1].evaluate(context) if isinstance(value, list): for result in value: yield result else: yield value elif context is not None: value = '{%s}%s' % (self[0].value, self[1].value) for item in context.iter_children_or_self(): if is_attribute_node(item, value): yield item[1] elif is_element_node(item, value): yield item ### # Variables @method('$', bp=90) def nud(self): self.parser.next_token.expected('(name)') self[:] = self.parser.expression(rbp=90), if self[0].value.startswith('{'): self[0].wrong_value("Variable reference requires a simple reference name") return self @method('$') def evaluate(self, context=None): varname = self[0].value if varname in self.parser.variables: return self.parser.variables[varname] elif context is None: return elif varname in context.variables: return context.variables[varname] elif isinstance(context, XPathSchemaContext): return else: raise ElementPathNameError('unknown variable', token=self) ### # Nullary operators (use only the context) @method(nullary('*')) def select(self, context=None): if self: # Product operator item = self.evaluate(context) if context is not None: context.item = item yield item elif context is None: self.missing_context() else: # Wildcard literal for item in context.iter_children_or_self(): if context.is_principal_node_kind(): if is_attribute_node(item): yield item[1] else: yield item @method(nullary('.')) def select(self, context=None): if context is None: self.missing_context() elif context.item is not None: yield context.item elif is_document_node(context.root): yield context.root @method(nullary('..')) def select(self, context=None): if context is None: self.missing_context() else: try: parent = context.parent_map[context.item] except KeyError: pass else: if is_element_node(parent): context.item = parent yield parent ### # Logical Operators @method(infix('or', bp=20)) def evaluate(self, context=None): return bool(self[0].evaluate(context) or self[1].evaluate(context)) @method(infix('and', bp=25)) def evaluate(self, context=None): return bool(self[0].evaluate(context) and self[1].evaluate(context)) @method(infix('=', bp=30)) def evaluate(self, context=None): return any(op1 == op2 for op1, op2 in self.get_comparison_data(context)) @method(infix('!=', bp=30)) def evaluate(self, context=None): return any(op1 != op2 for op1, op2 in self.get_comparison_data(context)) @method(infix('<', bp=30)) def evaluate(self, context=None): return any(op1 < op2 for op1, op2 in self.get_comparison_data(context)) @method(infix('>', bp=30)) def evaluate(self, context=None): return any(op1 > op2 for op1, op2 in self.get_comparison_data(context)) @method(infix('<=', bp=30)) def evaluate(self, context=None): return any(op1 <= op2 for op1, op2 in self.get_comparison_data(context)) @method(infix('>=', bp=30)) def evaluate(self, context=None): return any(op1 >= op2 for op1, op2 in self.get_comparison_data(context)) ### # Numerical operators prefix('+') prefix('-', bp=90) @method(infix('+', bp=40)) def evaluate(self, context=None): if not self: return elif len(self) == 1: arg = self.get_argument(context, cls=NumericTypeProxy) if arg is not None: try: return +arg except TypeError: raise self.wrong_type("numeric value is required: %r" % arg) else: op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy) if op1 is not None: try: return op1 + op2 except TypeError as err: raise self.wrong_type(str(err)) @method(infix('-', bp=40)) def evaluate(self, context=None): if len(self) == 1: arg = self.get_argument(context, cls=NumericTypeProxy) if arg is not None: try: return -arg except TypeError: raise self.wrong_type("numeric value is required: %r" % arg) else: op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy) if op1 is not None: try: return op1 - op2 except TypeError as err: raise self.wrong_type(str(err)) @method(infix('*', bp=45)) def evaluate(self, context=None): if self: op1, op2 = self.get_operands(context, cls=ArithmeticTypeProxy) if op1 is not None: try: return op1 * op2 except TypeError as err: raise self.wrong_type(str(err)) @method(infix('div', bp=45)) def evaluate(self, context=None): dividend, divisor = self.get_operands(context, cls=ArithmeticTypeProxy) if dividend is None: return elif divisor != 0: try: return dividend / divisor except TypeError as err: raise self.wrong_type(str(err)) elif dividend == 0: return float('nan') elif dividend > 0: return float('inf') else: return float('-inf') @method(infix('mod', bp=45)) def evaluate(self, context=None): op1, op2 = self.get_operands(context, cls=NumericTypeProxy) if op1 is not None: try: return op1 % op2 except TypeError as err: raise self.wrong_type(str(err)) ### # Union expressions @method(infix('|', bp=50)) def select(self, context=None): if context is not None: results = {item for k in range(2) for item in self[k].select(context.copy())} for item in context.iter(): if item in results: yield item ### # Path expressions @method('//', bp=80) @method('/', bp=80) def nud(self): next_token = self.parser.next_token if next_token.symbol == '(end)' and self.symbol == '/': return self elif not self.parser.next_is_path_step_token(): next_token.wrong_syntax() self[:] = self.parser.expression(80), return self @method('//', bp=80) @method('/', bp=80) def led(self, left): if not self.parser.next_is_path_step_token(): self.parser.next_token.wrong_syntax() self[:] = left, self.parser.expression(80) return self @method('/') def select(self, context=None): """ Child path expression. Selects child:: axis as default (when bind to '*' or '(name)'). """ if context is None: return elif not self: if is_document_node(context.root): yield context.root elif len(self) == 1: context.item = None for result in self[0].select(context): yield result else: items = set() left_results = list(self[0].select(context)) context.size = len(left_results) for context.position, context.item in enumerate(left_results): if not is_element_node(context.item): self.wrong_type("left operand must returns element nodes: {}".format(context.item)) for result in self[1].select(context): if is_etree_element(result) or isinstance(result, tuple): if result not in items: yield result items.add(result) else: yield result @method('/') def evaluate(self, context=None): """ General evaluation method for path operators, that may returns the a single value or None. """ if context is not None: selector = iter(self.select(context)) try: value = next(selector) except StopIteration: return else: try: next(selector) except StopIteration: return self.data_value(value) else: self.wrong_context_type("atomized operand is a sequence of length greater than one") @method('//') def select(self, context=None): if context is None: return elif len(self) == 1: for _ in context.iter_descendants(axis='descendant-or-self'): for result in self[0].select(context): yield result else: for elem in self[0].select(context): if not is_element_node(elem): self.wrong_type("left operand must returns element nodes: %r" % elem) for _ in context.iter_descendants(item=elem, axis='descendant-or-self'): for result in self[1].select(context): yield result ### # Predicate filters @method('[', bp=75) def led(self, left): self.parser.next_token.unexpected(']') self[:] = left, self.parser.expression() self.parser.advance(']') return self @method('[') def select(self, context=None): if context is not None: left_results = list(self[0].select(context)) context.size = len(left_results) for context.position, context.item in enumerate(left_results): predicate = list(self[1].select(context.copy())) if len(predicate) == 1 and not isinstance(predicate[0], bool) and \ isinstance(predicate[0], (int, float)): if context.position == predicate[0] - 1: yield context.item elif self.boolean_value(predicate): yield context.item ### # Parenthesized expressions @method('(', bp=100) def nud(self): self.parser.next_token.unexpected(')') self[:] = self.parser.expression(), self.parser.advance(')') return self # Skip self!! (remove a redundant level from selection/evaluation) @method('(') def evaluate(self, context=None): return self[0].evaluate(context) @method('(') def select(self, context=None): return self[0].select(context) ### # Forward Axes @method(axis('self')) def select(self, context=None): if context is not None: for _ in context.iter_self(): for result in self[0].select(context): yield result @method(axis('child')) def select(self, context=None): if context is not None: for _ in context.iter_children_or_self(child_axis=True): for result in self[0].select(context): yield result @method(axis('descendant')) def select(self, context=None): if context is not None: item = context.item for _ in context.iter_descendants(axis=self.symbol): if item is not context.item: for result in self[0].select(context): yield result @method(axis('descendant-or-self')) def select(self, context=None): if context is not None: for _ in context.iter_descendants(axis=self.symbol): for result in self[0].select(context): yield result @method(axis('following-sibling')) def select(self, context=None): if context is not None: if is_element_node(context.item): item = context.item for elem in context.iter_parent(axis=self.symbol): follows = False for child in context.iter_children_or_self(elem, child_axis=True): if follows: for result in self[0].select(context): yield result elif item is child: follows = True @method(axis('following')) def select(self, context=None): if context is not None: descendants = set(context.iter_descendants(axis=self.symbol)) item = context.item follows = False for elem in context.iter_descendants(item=context.root, axis=self.symbol): if follows: if elem not in descendants: for result in self[0].select(context): yield result elif item is elem: follows = True @method('@', bp=80) def nud(self): self[:] = self.parser.expression(rbp=80), if self[0].symbol not in ('*', '(name)', ':'): raise ElementPathSyntaxError("invalid attribute specification for XPath.") return self @method('@') @method(axis('attribute')) def select(self, context=None): if context is None: self.missing_context() for _ in context.iter_attributes(): for result in self[0].select(context): yield result @method(axis('namespace')) def select(self, context=None): if context is not None and is_element_node(context.item): elem = context.item namespaces = self.parser.namespaces for prefix_, uri in namespaces.items(): context.item = NamespaceNode(prefix_, uri) yield context.item if hasattr(elem, 'nsmap'): # Maybe an lxml's Element: don't use parser namespaces for axis. for prefix_, uri in elem.nsmap.items(): if prefix_ not in namespaces: context.item = NamespaceNode(prefix_, uri) yield context.item ### # Reverse Axes @method(axis('parent')) def select(self, context=None): if context is not None: for _ in context.iter_parent(axis=self.symbol): for result in self[0].select(context): yield result @method(axis('ancestor')) def select(self, context=None): if context is not None: results = [ item for _ in context.iter_ancestors(axis=self.symbol) for item in self[0].select(context) ] for result in reversed(results): context.item = result yield result @method(axis('ancestor-or-self')) def select(self, context=None): if context is not None: item = context.item for elem in reversed(list(context.iter_ancestors(axis=self.symbol))): context.item = elem yield elem yield item @method(axis('preceding-sibling')) def select(self, context=None): if context is not None and is_element_node(context.item): item = context.item for parent in context.iter_parent(axis=self.symbol): for child in parent: if child is item: break else: context.item = child for result in self[0].select(context): yield result @method(axis('preceding')) def select(self, context=None): if context is not None and is_element_node(context.item): elem = context.item ancestors = set(context.iter_ancestors(axis=self.symbol)) for e in context.root.iter(): if e is elem: break if e not in ancestors: context.item = e yield e ### # Node types @method(function('node', nargs=0)) def select(self, context=None): if context is not None: for item in context.iter_children_or_self(): if item is None: yield context.root elif is_xpath_node(item): yield item @method(function('processing-instruction', nargs=(0, 1))) def evaluate(self, context=None): if context and is_processing_instruction_node(context.item): return context.item @method(function('comment', nargs=0)) def evaluate(self, context=None): if context and is_comment_node(context.item): return context.item @method(function('text', nargs=0)) def select(self, context=None): if context is not None: for item in context.iter_children_or_self(): if item is None: yield context.root elif is_text_node(item): yield item ### # Node set functions @method(function('last', nargs=0)) def evaluate(self, context=None): return context.size if context is not None else 0 @method(function('position', nargs=0)) def evaluate(self, context=None): return context.position + 1 if context is not None else 0 @method(function('count', nargs=1)) def evaluate(self, context=None): results = self[0].evaluate(context) if isinstance(results, list): return len(results) elif results is not None: return 1 else: return 0 @method(function('id', nargs=1)) def select(self, context=None): if context is not None: value = self[0].evaluate(context) item = context.item if is_element_node(item): for elem in item.iter(): if elem.get(XML_ID) == value: yield elem @method(function('name', nargs=(0, 1))) @method(function('local-name', nargs=(0, 1))) @method(function('namespace-uri', nargs=(0, 1))) def evaluate(self, context=None): name = node_name(self.get_argument(context, default_to_context=True)) if name is None: return '' symbol = self.symbol if symbol == 'name': return qname_to_prefixed(name, self.parser.namespaces) elif not name or name[0] != '{': return name if symbol == 'local-name' else '' elif symbol == 'local-name': return name.split('}')[1] elif symbol == 'namespace-uri': return name.split('}')[0][1:] ### # String functions @method(function('string', nargs=1)) def evaluate(self, context=None): return self.string_value(self.get_argument(context)) @method(function('contains', nargs=2)) def evaluate(self, context=None): arg1 = self.get_argument(context, default='', cls=string_base_type) arg2 = self.get_argument(context, index=1, default='', cls=string_base_type) return arg2 in arg1 @method(function('concat')) def evaluate(self, context=None): return ''.join(self.string_value(self.get_argument(context, index=k)) for k in range(len(self))) @method(function('string-length', nargs=1)) def evaluate(self, context=None): return len(self.get_argument(context, default_to_context=True, default='', cls=string_base_type)) @method(function('normalize-space', nargs=1)) def evaluate(self, context=None): if self.parser.version == '1.0': arg = self.string_value(self.get_argument(context, default_to_context=True, default='')) else: arg = self.get_argument(context, default_to_context=True, default='', cls=string_base_type) return ' '.join(arg.strip().split()) @method(function('starts-with', nargs=2)) def evaluate(self, context=None): arg1 = self.get_argument(context, default='', cls=string_base_type) arg2 = self.get_argument(context, index=1, default='', cls=string_base_type) return arg1.startswith(arg2) @method(function('translate', nargs=3)) def evaluate(self, context=None): arg = self.get_argument(context, default='', cls=string_base_type) map_string = self.get_argument(context, index=1, default='', cls=string_base_type) trans_string = self.get_argument(context, index=2, default='', cls=string_base_type) if not PY3: import string maketrans = getattr(string, 'maketrans') arg = arg.encode('utf-8') map_string = map_string.encode('utf-8') trans_string = trans_string.encode('utf-8') else: maketrans = str.maketrans if len(map_string) == len(trans_string): return arg.translate(maketrans(map_string, trans_string)) elif len(map_string) > len(trans_string): k = len(trans_string) if PY3: return arg.translate(maketrans(map_string[:k], trans_string, map_string[k:])) for c in map_string[k:]: arg = arg.replace(c, '') return arg.translate(maketrans(map_string[:k], trans_string)) else: self.wrong_value("the third argument must have a length less or equal than the second") @method(function('substring', nargs=(2, 3))) def evaluate(self, context=None): item = self.get_argument(context, default='', cls=string_base_type) start = self.get_argument(context, index=1) try: if math.isnan(start) or math.isinf(start): return '' except TypeError: self.wrong_type("the second argument must be xs:numeric") else: start = int(round(start)) - 1 if len(self) == 2: return '' if item is None else item[max(start, 0):] else: length = self.get_argument(context, index=2) try: if math.isnan(length) or length <= 0: return '' except TypeError: self.wrong_type("the third argument must be xs:numeric") if item is None: return '' elif math.isinf(length): return item[max(start, 0):] else: stop = start + int(round(length)) return '' if item is None else item[slice(max(start, 0), max(stop, 0))] @method(function('substring-before', nargs=2)) @method(function('substring-after', nargs=2)) def evaluate(self, context=None): arg1 = self.get_argument(context, default='', cls=string_base_type) arg2 = self.get_argument(context, index=1, default='', cls=string_base_type) if arg1 is None: return '' index = 0 try: index = arg1.find(arg2) except AttributeError: self.wrong_type("the first argument must be a string") except TypeError: self.wrong_type("the second argument must be a string") if self.symbol == 'substring-before': return arg1[:index] else: return arg1[index + len(arg2):] ### # Boolean functions @method(function('boolean', nargs=1)) def evaluate(self, context=None): return self.boolean_value(self[0].get_results(context)) @method(function('not', nargs=1)) def evaluate(self, context=None): return not self.boolean_value(self[0].get_results(context)) @method(function('true', nargs=0)) def evaluate(self, context=None): return True @method(function('false', nargs=0)) def evaluate(self, context=None): return False @method(function('lang', nargs=1)) def evaluate(self, context=None): if context is None: return elif not is_element_node(context.item): return False else: try: lang = context.item.attrib[XML_LANG].strip() except KeyError: for elem in context.iter_ancestor(): if XML_LANG in elem.attrib: lang = elem.attrib[XML_LANG] break else: return False if '-' in lang: lang, _ = lang.split('-') return lang.lower() == self[0].evaluate().lower() ### # Number functions @method(function('number', nargs=(0, 1))) def evaluate(self, context=None): arg = self.get_argument(context, default_to_context=True) try: return float(self.string_value(arg) if is_xpath_node(arg) else arg) except (TypeError, ValueError): return float('nan') @method(function('sum', nargs=(1, 2))) def evaluate(self, context=None): values = [self.number_value(x) if isinstance(x, UntypedAtomic) else x for x in self[0].select(context)] if not values: zero = 0 if len(self) == 1 else self.get_argument(context, index=1) return [] if zero is None else zero elif any(isinstance(x, float) and math.isnan(x) for x in values): return float('nan') if any(isinstance(x, DayTimeDuration) for x in values) or \ all(isinstance(x, YearMonthDuration) for x in values): return sum(values) try: return sum(self.number_value(x) for x in values) except TypeError: if self.parser.version == '1.0': return float('nan') raise self.error('FORG0006') @method(function('ceiling', nargs=1)) @method(function('floor', nargs=1)) def evaluate(self, context=None): arg = self.get_argument(context) if arg is None: return float('nan') if self.parser.version == '1.0' else [] elif is_xpath_node(arg) or self.parser.compatibility_mode: arg = self.number_value(arg) if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)): return arg try: return math.floor(arg) if self.symbol == 'floor' else math.ceil(arg) except TypeError as err: self.wrong_type(str(err)) @method(function('round', nargs=1)) def evaluate(self, context=None): arg = self.get_argument(context) if arg is None: return float('nan') if self.parser.version == '1.0' else [] elif is_xpath_node(arg) or self.parser.compatibility_mode: arg = self.number_value(arg) if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)): return arg try: number = decimal.Decimal(arg) if number > 0: return float(number.quantize(decimal.Decimal('1'), rounding='ROUND_HALF_UP')) elif PY3: return float(round(number)) else: return float(number.quantize(decimal.Decimal('1'), rounding='ROUND_HALF_DOWN')) except TypeError as err: self.wrong_type(str(err)) except decimal.DecimalException as err: self.wrong_value(str(err)) register('(end)') XPath1Parser.build_tokenizer()