Some improvements to parsers and testing
- Added a test script for testing the package (debugging typos) - Added an helper function for nullary operators to the base Parser - Comparative tests for XPath 1.0 against lxml.xpath() (double checks)
This commit is contained in:
parent
c3980243f2
commit
4c8b4d8c8a
|
@ -21,7 +21,7 @@ from .exceptions import (
|
|||
ElementPathError, ElementPathSyntaxError, ElementPathNameError, ElementPathValueError, ElementPathTypeError
|
||||
)
|
||||
from .todp_parser import Token, Parser
|
||||
from .xpath_base import is_etree_element, XPathToken, XPathContext
|
||||
from .xpath_base import is_etree_element, is_xpath_node, XPathToken, XPathContext
|
||||
from .xpath1_parser import XPath1Parser
|
||||
from .xpath2_parser import XPath2Parser
|
||||
|
||||
|
@ -51,12 +51,31 @@ class Selector(object):
|
|||
def namespaces(self):
|
||||
return self.parser.namespaces
|
||||
|
||||
def findall(self, elem):
|
||||
def select(self, elem):
|
||||
context = XPathContext(elem)
|
||||
return list(self.root_token.select(context))
|
||||
results = list(self.root_token.select(context))
|
||||
if len(results) == 1 and self.root_token.label in ('function', 'literal'):
|
||||
return results[0]
|
||||
else:
|
||||
return results
|
||||
|
||||
def iter_select(self, elem):
|
||||
context = XPathContext(elem)
|
||||
return self.root_token.select(context)
|
||||
|
||||
|
||||
def select(elem, path, namespaces=None, schema=None, parser=XPath2Parser):
|
||||
parser = parser(namespaces, schema)
|
||||
root_token = parser.parse(path)
|
||||
context = XPathContext(elem)
|
||||
results = list(root_token.select(context))
|
||||
if len(results) == 1 and root_token.label in ('function', 'literal'):
|
||||
return results[0]
|
||||
else:
|
||||
return results
|
||||
|
||||
|
||||
def iter_select(elem, path, namespaces=None, schema=None, parser=XPath2Parser):
|
||||
parser = parser(namespaces, schema)
|
||||
root_token = parser.parse(path)
|
||||
context = XPathContext(elem)
|
||||
|
|
|
@ -403,6 +403,12 @@ class Parser(object):
|
|||
|
||||
return cls.register(symbol, label='literal', lbp=bp, evaluate=evaluate, nud=nud)
|
||||
|
||||
@classmethod
|
||||
def nullary(cls, symbol, bp=0):
|
||||
def nud(self):
|
||||
return self
|
||||
return cls.register(symbol, label='operator', lbp=bp, nud=nud)
|
||||
|
||||
@classmethod
|
||||
def prefix(cls, symbol, bp=0):
|
||||
def nud(self):
|
||||
|
@ -427,8 +433,6 @@ class Parser(object):
|
|||
@classmethod
|
||||
def postfix(cls, symbol, bp=0):
|
||||
def led(self, left):
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
self[0:] = left,
|
||||
return self
|
||||
return cls.register(symbol, label='operator', lbp=bp, rbp=bp, led=led)
|
||||
|
|
|
@ -13,8 +13,9 @@ from __future__ import division
|
|||
from .exceptions import ElementPathSyntaxError, ElementPathTypeError, ElementPathValueError
|
||||
from .todp_parser import Parser
|
||||
from .xpath_base import (
|
||||
XML_ID_ATTRIBUTE, XPathToken, is_etree_element, is_xpath_node, is_element_node,
|
||||
is_comment_node, is_processing_instruction_node, is_attribute_node, is_text_node
|
||||
XML_ID_ATTRIBUTE, XPathToken, qname_to_prefixed, is_etree_element, is_xpath_node,
|
||||
is_element_node, is_document_node, is_comment_node, is_processing_instruction_node,
|
||||
is_attribute_node, is_text_node
|
||||
)
|
||||
|
||||
|
||||
|
@ -45,11 +46,6 @@ class XPath1Parser(Parser):
|
|||
'string-length', 'normalize-space', 'translate',
|
||||
'boolean', 'not', 'true', 'false' # Boolean functions
|
||||
)
|
||||
RELATIVE_PATH_SYMBOLS = {'descendant-or-self', 'following-sibling', 'preceding-sibling',
|
||||
'ancestor-or-self', 'descendant', 'attribute', 'following',
|
||||
'namespace', 'preceding', 'ancestor', 'parent', 'child', 'self'} | {
|
||||
'(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '/'
|
||||
}
|
||||
|
||||
def __init__(self, namespaces=None, schema=None):
|
||||
super(XPath1Parser, self).__init__()
|
||||
|
@ -75,8 +71,7 @@ class XPath1Parser(Parser):
|
|||
try:
|
||||
pattern = axis_pattern_template % symbol.strip()
|
||||
except AttributeError:
|
||||
pattern = axis_pattern_template % symbol.symbol
|
||||
|
||||
pattern = axis_pattern_template % getattr(symbol, 'symbol')
|
||||
return cls.register(symbol, pattern=pattern, label='axis', lbp=bp, rbp=bp, nud=nud_)
|
||||
|
||||
@classmethod
|
||||
|
@ -121,12 +116,11 @@ class XPath1Parser(Parser):
|
|||
self.value = self.evaluate() # Static context evaluation
|
||||
return self
|
||||
|
||||
pattern_template = '\\b%s(?=\s*\\()'
|
||||
function_pattern_template = '\\b%s(?=\s*\\()'
|
||||
try:
|
||||
pattern = pattern_template % symbol.strip()
|
||||
pattern = function_pattern_template % symbol.strip()
|
||||
except AttributeError:
|
||||
pattern = pattern_template % symbol.symbol
|
||||
|
||||
pattern = function_pattern_template % getattr(symbol, 'symbol')
|
||||
return cls.register(symbol, pattern=pattern, label='function', lbp=bp, rbp=bp, nud=nud_)
|
||||
|
||||
def map_reference(self, ref):
|
||||
|
@ -167,6 +161,7 @@ XPath1Parser.begin()
|
|||
|
||||
register = XPath1Parser.register
|
||||
literal = XPath1Parser.literal
|
||||
nullary = XPath1Parser.nullary
|
||||
prefix = XPath1Parser.prefix
|
||||
infix = XPath1Parser.infix
|
||||
postfix = XPath1Parser.postfix
|
||||
|
@ -192,7 +187,7 @@ literal('(decimal)')
|
|||
literal('(integer)')
|
||||
|
||||
|
||||
@method('(name)', bp=10)
|
||||
@method(literal('(name)', bp=10))
|
||||
def nud(self):
|
||||
if self.value[0] != '{' and ':' in self.value:
|
||||
self.value = self.parser.map_reference(self.value)
|
||||
|
@ -219,42 +214,6 @@ def select(self, context):
|
|||
yield context.item
|
||||
|
||||
|
||||
@method(literal('*'))
|
||||
def select(self, context):
|
||||
if not self:
|
||||
# Wildcard literal
|
||||
if context.active_iterator is None:
|
||||
for child in context.iter_children():
|
||||
if is_element_node(child):
|
||||
yield child
|
||||
elif context.principal_node_kind:
|
||||
if is_attribute_node(context.item):
|
||||
yield context.item[1]
|
||||
else:
|
||||
yield context.item
|
||||
else:
|
||||
# Product operator
|
||||
context.item = self[0].evaluate(context)
|
||||
yield context.item
|
||||
|
||||
|
||||
@method(literal('.'))
|
||||
def select(self, context):
|
||||
yield context.item if context.item is not None else context.root
|
||||
|
||||
|
||||
@method(literal('..'))
|
||||
def select(self, context):
|
||||
try:
|
||||
parent = context.parent_map[context.item]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if is_element_node(parent):
|
||||
context.item = parent
|
||||
yield parent
|
||||
|
||||
|
||||
###
|
||||
# Comments
|
||||
@method(literal('(:'))
|
||||
|
@ -296,6 +255,250 @@ def evaluate(self, context=None):
|
|||
self.wrong_name('unknown variable')
|
||||
|
||||
|
||||
###
|
||||
# Nullary operators (use only the context)
|
||||
@method(nullary('*'))
|
||||
def select(self, context):
|
||||
if not self:
|
||||
# Wildcard literal
|
||||
if context.active_iterator is None:
|
||||
for child in context.iter_children():
|
||||
if is_element_node(child):
|
||||
yield child
|
||||
elif context.principal_node_kind:
|
||||
if is_attribute_node(context.item):
|
||||
yield context.item[1]
|
||||
else:
|
||||
yield context.item
|
||||
else:
|
||||
# Product operator
|
||||
context.item = self[0].evaluate(context)
|
||||
yield context.item
|
||||
|
||||
|
||||
@method(nullary('.'))
|
||||
def select(self, context):
|
||||
if context.item is not None:
|
||||
yield context.item
|
||||
elif is_document_node(context.root):
|
||||
yield context.root
|
||||
|
||||
|
||||
@method(nullary('..'))
|
||||
def select(self, context):
|
||||
try:
|
||||
parent = context.parent_map[context.item]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if is_element_node(parent):
|
||||
context.item = parent
|
||||
yield parent
|
||||
|
||||
|
||||
###
|
||||
# Logical Operators
|
||||
@method(infix('or', bp=20))
|
||||
def evaluate(self, context=None):
|
||||
return bool(self[0].evaluate(context) or self[1].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('and', bp=25))
|
||||
def evaluate(self, context=None):
|
||||
return bool(self[0].evaluate(context) and self[1].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) == self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('!=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) != self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('<', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) < self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('>', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) > self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('<=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) <= self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('>=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) >= self[1].evaluate(context)
|
||||
|
||||
|
||||
###
|
||||
# Numerical operators
|
||||
prefix('+')
|
||||
prefix('-', bp=90)
|
||||
|
||||
|
||||
@method(infix('+', bp=40))
|
||||
def evaluate(self, context=None):
|
||||
if len(self) > 1:
|
||||
try:
|
||||
return self[0].evaluate(context) + self[1].evaluate(context)
|
||||
except TypeError:
|
||||
raise ElementPathTypeError("a numeric value is required: %r." % self[0])
|
||||
else:
|
||||
try:
|
||||
return +self[0].evaluate(context)
|
||||
except TypeError:
|
||||
raise ElementPathTypeError("numeric values are required: %r." % self[:])
|
||||
|
||||
|
||||
@method(infix('-', bp=40))
|
||||
def evaluate(self, context=None):
|
||||
try:
|
||||
try:
|
||||
return self[0].evaluate(context) - self[1].evaluate(context)
|
||||
except TypeError:
|
||||
self.wrong_type("values must be numeric: %r" % [tk.evaluate(context) for tk in self])
|
||||
except IndexError:
|
||||
try:
|
||||
return -self[0].evaluate(context)
|
||||
except TypeError:
|
||||
self.wrong_type("value must be numeric: %r" % self[0].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('*', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
if self:
|
||||
return self[0].evaluate(context) * self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('div', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) / self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('mod', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) % self[1].evaluate(context)
|
||||
|
||||
|
||||
###
|
||||
# Union expressions
|
||||
@method(infix('|', bp=50))
|
||||
def select(self, context):
|
||||
results = {self.filter_node(elem) for k in range(2) for elem in self[k].select(context)}
|
||||
for elem in self.root.iter():
|
||||
if elem in results:
|
||||
context.item = elem
|
||||
yield elem
|
||||
|
||||
|
||||
###
|
||||
# Path expressions
|
||||
@method('//', bp=80)
|
||||
@method('/', bp=80)
|
||||
def nud(self):
|
||||
next_token = self.parser.next_token
|
||||
if not self.parser.source_first:
|
||||
self.wrong_symbol()
|
||||
elif next_token.symbol == '(end)' and self.symbol == '/':
|
||||
return self
|
||||
elif not self.parser.next_token.is_path_step_token():
|
||||
next_token.wrong_symbol()
|
||||
self[0:] = self.parser.expression(80),
|
||||
return self
|
||||
|
||||
|
||||
@method('//', bp=80)
|
||||
@method('/', bp=80)
|
||||
def led(self, left):
|
||||
if not self.parser.next_token.is_path_step_token():
|
||||
self.parser.next_token.wrong_symbol()
|
||||
self[0:1] = left, self.parser.expression(80)
|
||||
return self
|
||||
|
||||
|
||||
@method('/')
|
||||
def select(self, context):
|
||||
"""
|
||||
Child path expression. Selects child:: axis as default (when bind to '*' or '(name)').
|
||||
"""
|
||||
if not self:
|
||||
if is_document_node(context.root):
|
||||
yield context.root
|
||||
elif len(self) == 1:
|
||||
context.item = None
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
else:
|
||||
items = set()
|
||||
for elem in self[0].select(context):
|
||||
if not is_element_node(elem):
|
||||
self.wrong_type("left operand must returns element nodes: %r" % elem)
|
||||
for result in self[1].select(context.copy(item=elem)):
|
||||
if is_etree_element(result) or isinstance(result, tuple):
|
||||
if result not in items:
|
||||
yield result
|
||||
items.add(result)
|
||||
else:
|
||||
yield result
|
||||
|
||||
|
||||
@method('//')
|
||||
def select(self, context):
|
||||
if len(self) == 1:
|
||||
for _ in context.iter_descendants():
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
else:
|
||||
for elem in self[0].select(context):
|
||||
if not is_element_node(elem):
|
||||
self.wrong_type("left operand must returns element nodes: %r" % elem)
|
||||
for _ in context.iter_descendants(item=elem):
|
||||
for result in self[1].select(context):
|
||||
yield result
|
||||
|
||||
|
||||
###
|
||||
# Parenthesized expressions
|
||||
@method('(', bp=90)
|
||||
def nud(self):
|
||||
self.parser.next_token.unexpected(')')
|
||||
self[0:] = self.parser.expression(),
|
||||
self.parser.advance(')')
|
||||
return self[0]
|
||||
|
||||
|
||||
###
|
||||
# Predicate filters
|
||||
@method('[', bp=90)
|
||||
def led(self, left):
|
||||
self.parser.next_token.unexpected(']')
|
||||
self[0:1] = left, self.parser.expression()
|
||||
self.parser.advance(']')
|
||||
return self
|
||||
|
||||
|
||||
@method('[')
|
||||
def select(self, context):
|
||||
for result in self[0].select(context):
|
||||
predicate = list(self[1].select(context.copy()))
|
||||
if len(predicate) == 1 and not isinstance(predicate[0], bool) and \
|
||||
isinstance(predicate[0], (int, float)):
|
||||
if context.position == predicate[0] - 1:
|
||||
context.item = result
|
||||
yield result
|
||||
elif self.boolean(predicate):
|
||||
context.item = result
|
||||
yield result
|
||||
|
||||
|
||||
###
|
||||
# Forward Axes
|
||||
@method(axis('self', bp=80))
|
||||
|
@ -331,18 +534,14 @@ def select(self, context):
|
|||
@method(axis('following-sibling', bp=80))
|
||||
def select(self, context):
|
||||
if is_element_node(context.item):
|
||||
elem = context.item
|
||||
try:
|
||||
parent = context.parent_map[elem]
|
||||
except KeyError:
|
||||
return
|
||||
else:
|
||||
item = context.item
|
||||
for _ in context.iter_parent():
|
||||
follows = False
|
||||
for item in context.iter_children(item=parent):
|
||||
for child in context.iter_children():
|
||||
if follows:
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
elif item is elem:
|
||||
elif item is child:
|
||||
follows = True
|
||||
|
||||
|
||||
|
@ -361,7 +560,6 @@ def select(self, context):
|
|||
|
||||
|
||||
@method('@', bp=80)
|
||||
@method('attribute', bp=80)
|
||||
def nud(self):
|
||||
self[0:] = self.parser.expression(rbp=80),
|
||||
if self[0].symbol not in ('*', '(name)'):
|
||||
|
@ -370,7 +568,7 @@ def nud(self):
|
|||
|
||||
|
||||
@method('@')
|
||||
@method(axis('attribute'))
|
||||
@method(axis('attribute', bp=80))
|
||||
def select(self, context):
|
||||
for _ in context.iter_attributes():
|
||||
for result in self[0].select(context):
|
||||
|
@ -381,7 +579,7 @@ def select(self, context):
|
|||
def select(self, context):
|
||||
if is_element_node(context.item):
|
||||
element_class = context.item.__class__
|
||||
for prefix_, uri in sorted(self.parser.namespaces.items()):
|
||||
for prefix_, uri in self.parser.namespaces.items():
|
||||
context.item = element_class(tag=prefix_, text=uri)
|
||||
yield context.item
|
||||
|
||||
|
@ -390,12 +588,8 @@ def select(self, context):
|
|||
# Reverse Axes
|
||||
@method(axis('parent', bp=80))
|
||||
def select(self, context):
|
||||
try:
|
||||
parent = context.parent_map[context.item]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
for result in self[0].select(context.copy(item=parent)):
|
||||
for _ in context.iter_parent():
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
|
||||
|
||||
|
@ -419,17 +613,15 @@ def select(self, context):
|
|||
@method(axis('preceding-sibling', bp=80))
|
||||
def select(self, context):
|
||||
if is_element_node(context.item):
|
||||
elem = context.item
|
||||
try:
|
||||
parent = context.parent_map[elem]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
item = context.item
|
||||
for parent in context.iter_parent():
|
||||
for child in parent:
|
||||
if child is elem:
|
||||
if child is item:
|
||||
break
|
||||
context.item = child
|
||||
yield child
|
||||
else:
|
||||
context.item = child
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
|
||||
|
||||
@method(axis('preceding', bp=80))
|
||||
|
@ -528,30 +720,38 @@ def select(self, context):
|
|||
|
||||
|
||||
@method(function('name', nargs=(0, 1), bp=90))
|
||||
def evaluate(self, context=None):
|
||||
try:
|
||||
return self.name(self[0].evaluate(context))
|
||||
except IndexError:
|
||||
if context is None:
|
||||
self.missing_context()
|
||||
return self.name(context.item)
|
||||
|
||||
|
||||
@method(function('local-name', nargs=(0, 1), bp=90))
|
||||
@method(function('namespace-uri', nargs=(0, 1), bp=90))
|
||||
def evaluate(self, context=None):
|
||||
try:
|
||||
name = self.name(self[0])
|
||||
except IndexError:
|
||||
if context is None:
|
||||
self.missing_context()
|
||||
if context is None:
|
||||
return
|
||||
elif not self:
|
||||
name = self.name(context.item)
|
||||
if name[0] != '{':
|
||||
return name
|
||||
else:
|
||||
try:
|
||||
selector = iter(self[0].select(context))
|
||||
item = next(selector)
|
||||
except StopIteration:
|
||||
name = ''
|
||||
else:
|
||||
name = self.name(item)
|
||||
if self.parser.version > '1.0':
|
||||
try:
|
||||
next(selector)
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
self.wrong_value("a sequence of more than one item is not allowed as argument")
|
||||
|
||||
symbol = self.symbol
|
||||
if symbol == 'name':
|
||||
return qname_to_prefixed(name, self.parser.namespaces)
|
||||
elif not name or name[0] != '{':
|
||||
return name if symbol == 'local-name' else ''
|
||||
elif symbol == 'local-name':
|
||||
return name.split('}')[1]
|
||||
|
||||
|
||||
function('namespace-uri', nargs=1, bp=90)
|
||||
elif symbol == 'namespace-uri':
|
||||
return name.split('}')[0][1:]
|
||||
|
||||
|
||||
###
|
||||
|
@ -677,205 +877,4 @@ def evaluate(self, context=None):
|
|||
return False
|
||||
|
||||
|
||||
###
|
||||
# Logical Operators
|
||||
@method(infix('or', bp=20))
|
||||
def evaluate(self, context=None):
|
||||
return bool(self[0].evaluate(context) or self[1].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('and', bp=25))
|
||||
def evaluate(self, context=None):
|
||||
return bool(self[0].evaluate(context) and self[1].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) == self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('!=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) != self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('<', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) < self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('>', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) > self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('<=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) <= self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('>=', bp=30))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) >= self[1].evaluate(context)
|
||||
|
||||
|
||||
###
|
||||
# Numerical operators
|
||||
prefix('+')
|
||||
prefix('-', bp=90)
|
||||
|
||||
|
||||
@method(infix('+', bp=40))
|
||||
def evaluate(self, context=None):
|
||||
if len(self) > 1:
|
||||
try:
|
||||
return self[0].evaluate(context) + self[1].evaluate(context)
|
||||
except TypeError:
|
||||
raise ElementPathTypeError("a numeric value is required: %r." % self[0])
|
||||
else:
|
||||
try:
|
||||
return +self[0].evaluate(context)
|
||||
except TypeError:
|
||||
raise ElementPathTypeError("numeric values are required: %r." % self[:])
|
||||
|
||||
|
||||
@method(infix('-', bp=40))
|
||||
def evaluate(self, context=None):
|
||||
try:
|
||||
try:
|
||||
return self[0].evaluate(context) - self[1].evaluate(context)
|
||||
except TypeError:
|
||||
self.wrong_type("values must be numeric: %r" % [tk.evaluate(context) for tk in self])
|
||||
except IndexError:
|
||||
try:
|
||||
return -self[0].evaluate(context)
|
||||
except TypeError:
|
||||
self.wrong_type("value must be numeric: %r" % self[0].evaluate(context))
|
||||
|
||||
|
||||
@method(infix('*', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
if self:
|
||||
return self[0].evaluate(context) * self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('div', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) / self[1].evaluate(context)
|
||||
|
||||
|
||||
@method(infix('mod', bp=45))
|
||||
def evaluate(self, context=None):
|
||||
return self[0].evaluate(context) % self[1].evaluate(context)
|
||||
|
||||
|
||||
###
|
||||
# Union expressions
|
||||
@method(infix('|', bp=50))
|
||||
def select(self, context):
|
||||
results = {self.filter_node(elem) for k in range(2) for elem in self[k].select(context)}
|
||||
for elem in self.root.iter():
|
||||
if elem in results:
|
||||
context.item = elem
|
||||
yield elem
|
||||
|
||||
|
||||
###
|
||||
# Path expressions
|
||||
@method('//', bp=80)
|
||||
@method('/', bp=80)
|
||||
def nud(self):
|
||||
if not self.parser.source_first:
|
||||
self.wrong_symbol()
|
||||
elif self.parser.next_token.symbol == '(end)' and self.symbol == '/':
|
||||
return self
|
||||
elif self.parser.next_token.symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
|
||||
self.parser.next_token.wrong_symbol()
|
||||
self[0:] = self.parser.expression(80),
|
||||
return self
|
||||
|
||||
|
||||
@method('//', bp=80)
|
||||
@method('/', bp=80)
|
||||
def led(self, left):
|
||||
if self.parser.next_token.symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
|
||||
self.parser.next_token.wrong_symbol()
|
||||
self[0:1] = left, self.parser.expression(80)
|
||||
return self
|
||||
|
||||
|
||||
@method('/')
|
||||
def select(self, context):
|
||||
"""
|
||||
Child path expression. Selects child:: axis as default (when bind to '*' or '(name)').
|
||||
"""
|
||||
if not self:
|
||||
yield context.root
|
||||
elif len(self) == 1:
|
||||
context.item = None
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
else:
|
||||
items = set()
|
||||
for elem in self[0].select(context):
|
||||
if not is_element_node(elem):
|
||||
self.wrong_type("left operand must returns element nodes: %r" % elem)
|
||||
for result in self[1].select(context.copy(item=elem)):
|
||||
if is_etree_element(result) or isinstance(result, tuple):
|
||||
if result not in items:
|
||||
yield result
|
||||
items.add(result)
|
||||
else:
|
||||
yield result
|
||||
|
||||
|
||||
@method('//')
|
||||
def select(self, context):
|
||||
if len(self) == 1:
|
||||
for _ in context.iter_descendants():
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
else:
|
||||
for elem in self[0].select(context):
|
||||
if not is_element_node(elem):
|
||||
self.wrong_type("left operand must returns element nodes: %r" % elem)
|
||||
for _ in context.iter_descendants(item=elem):
|
||||
for result in self[1].select(context):
|
||||
yield result
|
||||
|
||||
|
||||
###
|
||||
# Parenthesized expressions
|
||||
@method('(', bp=90)
|
||||
def nud(self):
|
||||
self.parser.next_token.unexpected(')')
|
||||
self[0:] = self.parser.expression(),
|
||||
self.parser.advance(')')
|
||||
return self[0]
|
||||
|
||||
|
||||
###
|
||||
# Predicate filters
|
||||
@method('[', bp=90)
|
||||
def led(self, left):
|
||||
self.parser.next_token.unexpected(']')
|
||||
self[0:1] = left, self.parser.expression()
|
||||
self.parser.advance(']')
|
||||
return self
|
||||
|
||||
|
||||
@method('[')
|
||||
def select(self, context):
|
||||
for result in self[0].select(context):
|
||||
predicate = list(self[1].select(context.copy()))
|
||||
if len(predicate) == 1 and not isinstance(predicate[0], bool) and \
|
||||
isinstance(predicate[0], (int, float)):
|
||||
if context.position == predicate[0] - 1:
|
||||
context.item = result
|
||||
yield result
|
||||
elif self.boolean(predicate):
|
||||
context.item = result
|
||||
yield result
|
||||
|
||||
|
||||
XPath1Parser.end()
|
||||
|
|
|
@ -29,7 +29,6 @@ class XPath2Parser(XPath1Parser):
|
|||
# XPath 2.0 added functions
|
||||
'document-node', # Node test functions
|
||||
)
|
||||
RELATIVE_PATH_SYMBOLS = XPath1Parser.RELATIVE_PATH_SYMBOLS | {s for s in SYMBOLS if s.endswith("::")}
|
||||
|
||||
RESERVED_FUNCTIONS = {
|
||||
'attribute(', 'comment(', 'document-node(', 'element(', 'empty-sequence(', 'if', 'item', 'node(',
|
||||
|
|
|
@ -11,12 +11,42 @@
|
|||
import sys
|
||||
from .exceptions import ElementPathTypeError, ElementPathValueError
|
||||
from .todp_parser import Token
|
||||
import re
|
||||
|
||||
|
||||
_RE_MATCH_NAMESPACE = re.compile(r'{([^}]*)}')
|
||||
|
||||
|
||||
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
||||
XML_ID_ATTRIBUTE = '{%s}id' % XML_NAMESPACE
|
||||
|
||||
|
||||
def get_namespace(name):
|
||||
try:
|
||||
return _RE_MATCH_NAMESPACE.match(name).group(1)
|
||||
except (AttributeError, TypeError):
|
||||
return ''
|
||||
|
||||
|
||||
def qname_to_prefixed(qname, namespaces):
|
||||
"""
|
||||
Transforms a fully qualified name into a prefixed reference using a namespace map.
|
||||
|
||||
:param qname: a fully qualified name or a local name.
|
||||
:param namespaces: Dictionary with the map from prefixes to namespace URIs.
|
||||
:return: String with a prefixed or local reference.
|
||||
"""
|
||||
qname_uri = get_namespace(qname)
|
||||
for prefix, uri in sorted(namespaces.items(), reverse=True):
|
||||
if uri != qname_uri:
|
||||
continue
|
||||
if prefix:
|
||||
return qname.replace(u'{%s}' % uri, u'%s:' % prefix)
|
||||
else:
|
||||
return qname.replace(u'{%s}' % uri, '')
|
||||
return qname
|
||||
|
||||
|
||||
###
|
||||
# XPath node types test functions
|
||||
#
|
||||
|
@ -130,6 +160,11 @@ class XPathToken(Token):
|
|||
else:
|
||||
self.wrong_type("an XPath node required: %r" % value)
|
||||
|
||||
def is_path_step_token(self):
|
||||
return self.label == 'axis' or self.symbol in {
|
||||
'(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '/'
|
||||
}
|
||||
|
||||
# Errors
|
||||
def missing_context(self):
|
||||
raise ElementPathValueError("%s: dynamic context required for evaluate." % self)
|
||||
|
@ -214,6 +249,19 @@ class XPathContext(object):
|
|||
self.item, self.size, self.position, self._iterator = status
|
||||
self._node_kind_test = is_element_node
|
||||
|
||||
def iter_parent(self):
|
||||
status = self.item, self.size, self.position, self._iterator
|
||||
self._iterator, self._node_kind_test = self.iter_parent, is_element_node
|
||||
|
||||
try:
|
||||
self.item = self.parent_map[self.item]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
yield self.item
|
||||
|
||||
self.item, self.size, self.position, self._iterator = status
|
||||
|
||||
def iter_descendants(self, item=None):
|
||||
def _iter_descendants():
|
||||
elem = self.item
|
||||
|
|
|
@ -59,11 +59,14 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.assertEqual(list(self.parser.parse(path).select(context)), expected)
|
||||
|
||||
def check_select(self, path, root, expected, namespaces=None, schema=None):
|
||||
selector = select(root, path, namespaces, schema, self.parser.__class__)
|
||||
if isinstance(expected, type) and issubclass(expected, Exception):
|
||||
self.assertRaises(expected, list, selector)
|
||||
self.assertRaises(expected, select, root, path, namespaces, schema, self.parser.__class__)
|
||||
else:
|
||||
self.assertEqual(list(selector), expected)
|
||||
results = select(root, path, namespaces, schema, self.parser.__class__)
|
||||
if isinstance(expected, set):
|
||||
self.assertEqual(set(results), expected)
|
||||
else:
|
||||
self.assertEqual(results, expected)
|
||||
|
||||
def wrong_syntax(self, path):
|
||||
self.assertRaises(ElementPathSyntaxError, self.parser.parse, path)
|
||||
|
@ -114,11 +117,37 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_tokenizer("last (:", ['last', '', '(:'])
|
||||
|
||||
def test_tokens(self):
|
||||
# Literals
|
||||
self.check_token('(string)', 'literal', "'hello' string",
|
||||
"token(symbol='(string)', value='hello')", 'hello')
|
||||
self.check_token('(integer)', 'literal', "1999 integer",
|
||||
"token(symbol='(integer)', value=1999)", 1999)
|
||||
self.check_token('(float)', 'literal', "3.1415 float",
|
||||
"token(symbol='(float)', value=3.1415)", 3.1415)
|
||||
self.check_token('(decimal)', 'literal', "217.35 decimal",
|
||||
"token(symbol='(decimal)', value=217.35)", 217.35)
|
||||
self.check_token('(name)', 'literal', "'schema' name",
|
||||
"token(symbol='(name)', value='schema')", 'schema')
|
||||
|
||||
# Axes
|
||||
self.check_token('self', 'axis', "self axis", "token(symbol='self')")
|
||||
self.check_token('child', 'axis', "child axis", "token(symbol='child')")
|
||||
self.check_token('parent', 'axis', "parent axis", "token(symbol='parent')")
|
||||
self.check_token('ancestor', 'axis', "ancestor axis", "token(symbol='ancestor')")
|
||||
self.check_token('preceding', 'axis', "preceding axis", "token(symbol='preceding')")
|
||||
self.check_token('descendant-or-self', 'axis', "descendant-or-self axis")
|
||||
self.check_token('following-sibling', 'axis', "following-sibling axis")
|
||||
self.check_token('preceding-sibling', 'axis', "preceding-sibling axis")
|
||||
self.check_token('ancestor-or-self', 'axis', "ancestor-or-self axis")
|
||||
self.check_token('descendant', 'axis', "descendant axis")
|
||||
self.check_token('attribute', 'axis', "attribute axis")
|
||||
self.check_token('following', 'axis', "following axis")
|
||||
self.check_token('namespace', 'axis', "namespace axis")
|
||||
|
||||
# Functions
|
||||
self.check_token('position', 'function', "position() function", "token(symbol='position')")
|
||||
|
||||
# Operators
|
||||
self.check_token('and', 'operator', "'and' operator", "token(symbol='and')")
|
||||
|
||||
def test_implementation(self):
|
||||
|
@ -128,7 +157,7 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_tree('child::B1', '(child (B1))')
|
||||
self.check_tree('A/B//C/D', '(/ (// (/ (A) (B)) (C)) (D))')
|
||||
self.check_tree('child::*/child::B1', '(/ (child (*)) (child (B1)))')
|
||||
self.check_tree('attribute::name="Galileo"', '(attribute (= (name) (Galileo)))')
|
||||
self.check_tree('attribute::name="Galileo"', '(= (attribute (name)) (Galileo))')
|
||||
self.check_tree('1 + 2 * 3', '(+ (1) (* (2) (3)))')
|
||||
self.check_tree('(1 + 2) * 3', '(* (+ (1) (2)) (3))')
|
||||
self.check_tree("false() and true()", '(and (False) (True))')
|
||||
|
@ -183,9 +212,23 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_value("last()", 0)
|
||||
self.check_value("last()", 3, context=context)
|
||||
self.check_value("last()-1", 2, context=context)
|
||||
|
||||
self.check_value("count((0, 1, 2 + 1, 3 - 1))", 4)
|
||||
self.check_select("count(5)", root, [1])
|
||||
# self.check_select("name(A)", root, [1])
|
||||
|
||||
self.check_select("name(.)", root, 'A')
|
||||
self.check_select("name(A)", root, '')
|
||||
self.check_select("local-name(A)", root, '')
|
||||
self.check_select("namespace-uri(A)", root, '')
|
||||
self.check_select("name(B2)", root, 'B2')
|
||||
self.check_select("local-name(B2)", root, 'B2')
|
||||
self.check_select("namespace-uri(B2)", root, '')
|
||||
if self.parser.version <= '1.0':
|
||||
self.check_select("name(*)", root, 'B1')
|
||||
root = self.etree.XML('<tst:A xmlns:tst="http://xpath.test/ns"/>')
|
||||
self.check_select("name(.)", root, 'tst:A', namespaces={'tst': "http://xpath.test/ns"})
|
||||
self.check_select("local-name(.)", root, 'A')
|
||||
self.check_select("namespace-uri(.)", root, 'http://xpath.test/ns')
|
||||
# self.check_select("name(tst:A)", root, ['{http://xpath.test/ns}A'])
|
||||
|
||||
def test_string_functions(self):
|
||||
self.check_value("string(10.0)", '10.0')
|
||||
|
@ -267,7 +310,7 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
|
||||
def test_child_operator(self):
|
||||
root = self.etree.XML('<A><B1><C1/></B1><B2/><B3><C1/><C2/></B3></A>')
|
||||
self.check_select('/', root, [root]) # root element is the child of the document
|
||||
self.check_select('/', root, []) # a root element is not a document!
|
||||
self.check_select('/B1', root, [])
|
||||
self.check_select('/A1', root, [])
|
||||
self.check_select('/A', root, [root])
|
||||
|
@ -284,7 +327,7 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
def test_context_item_expression(self):
|
||||
root = self.etree.XML('<A><B1><C/></B1><B2/><B3><C1/><C2/></B3></A>')
|
||||
self.check_select('.', root, [root])
|
||||
self.check_select('/././.', root, [root])
|
||||
self.check_select('/././.', root, [])
|
||||
self.check_select('/A/.', root, [root])
|
||||
self.check_select('/A/B1/.', root, [root[0]])
|
||||
self.check_select('/A/B1/././.', root, [root[0]])
|
||||
|
@ -333,7 +376,7 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_select('/A/B1/attribute::*', root, ['beta1'])
|
||||
self.check_select('/A/B1/@*', root, ['beta1'])
|
||||
self.check_select('/A/B3/attribute::*', root, ['beta2', 'beta3'])
|
||||
self.check_select('/A/attribute::*', root, ['alpha', '1']) # sorted by attribute name
|
||||
self.check_select('/A/attribute::*', root, {'1', 'alpha'})
|
||||
|
||||
def test_following_axis(self):
|
||||
root = self.etree.XML('<A><B1><C1/></B1><B2/><B3><C1/><C2/></B3><B4><C1><D1/></C1></B4></A>')
|
||||
|
@ -363,7 +406,7 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2></A>')
|
||||
self.check_select('/A/B1/C2/preceding-sibling::*', root, [root[0][0]])
|
||||
self.check_select('/A/B2/C4/preceding-sibling::*', root, [root[1][0], root[1][1], root[1][2]])
|
||||
self.check_select('/A/B1/C2/preceding-sibling::C3', root, [root[0][0]])
|
||||
self.check_select('/A/B1/C2/preceding-sibling::C3', root, [])
|
||||
|
||||
def test_preceding_axis(self):
|
||||
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2></A>')
|
||||
|
@ -398,13 +441,17 @@ class XPath2ParserTest(XPath1ParserTest):
|
|||
|
||||
def test_boolean_functions2(self):
|
||||
root = self.etree.XML('<A><B1/><B2/><B3/></A>')
|
||||
#self.check_select("boolean((A, 35))", root, True) # Too much arguments
|
||||
# self.check_select("boolean((A, 35))", root, True) # Too much arguments
|
||||
|
||||
def test_numerical_expressions2(self):
|
||||
self.check_value("5 idiv 2", 2)
|
||||
self.check_value("-3.5 idiv -2", 1)
|
||||
self.check_value("-3.5 idiv 2", -1)
|
||||
|
||||
def test_node_set_functions2(self):
|
||||
root = self.etree.XML('<A><B1><C1/><C2/></B1><B2/><B3><C3/><C4/><C5/></B3></A>')
|
||||
self.check_select("count(5)", root, 1)
|
||||
|
||||
|
||||
class LxmlXPath1ParserTest(XPath1ParserTest):
|
||||
|
||||
|
@ -414,12 +461,17 @@ class LxmlXPath1ParserTest(XPath1ParserTest):
|
|||
cls.etree = lxml.etree
|
||||
|
||||
def check_select(self, path, root, expected, namespaces=None, schema=None):
|
||||
selector = select(root, path, namespaces, schema, self.parser.__class__)
|
||||
if isinstance(expected, type) and issubclass(expected, Exception):
|
||||
self.assertRaises(expected, list, selector)
|
||||
self.assertRaises(expected, select, root, path, namespaces, schema, self.parser.__class__)
|
||||
else:
|
||||
self.assertEqual(list(selector), expected)
|
||||
# self.assertEqual(root.xpath(path), expected) TODO: check some XPath 1.0 peculiarities before ...
|
||||
results = select(root, path, namespaces, schema, self.parser.__class__)
|
||||
if isinstance(expected, set):
|
||||
self.assertEqual(set(results), expected)
|
||||
self.assertEqual(set(root.xpath(path)), expected)
|
||||
else:
|
||||
self.assertEqual(results, expected)
|
||||
self.assertEqual(root.xpath(path), expected)
|
||||
|
||||
|
||||
class LxmlXPath2ParserTest(XPath2ParserTest):
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
|
||||
# All rights reserved.
|
||||
# This file is distributed under the terms of the MIT License.
|
||||
# See the file 'LICENSE' in the root directory of the present
|
||||
# distribution, or http://opensource.org/licenses/MIT.
|
||||
#
|
||||
# @author Davide Brunato <brunato@sissa.it>
|
||||
#
|
||||
import unittest
|
||||
import glob
|
||||
import fileinput
|
||||
import os
|
||||
|
||||
|
||||
class PackageTest(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.test_dir = os.path.dirname(__file__)
|
||||
cls.source_dir = os.path.join(cls.test_dir, '../elementpath/')
|
||||
cls.missing_debug_regex = r"(\bimport\s+pdb\b|\bpdb\s*\.\s*set\_trace\(\s*\)|\bprint\s*\()"
|
||||
|
||||
def test_missing_debug_statements(self):
|
||||
message = "\nFound a debug missing statement at line %d or file %r."
|
||||
filename = None
|
||||
for line in fileinput.input(glob.glob(self.source_dir + '*.py')):
|
||||
if fileinput.isfirstline():
|
||||
filename = fileinput.filename()
|
||||
lineno = fileinput.lineno()
|
||||
|
||||
# noinspection PyCompatibility
|
||||
self.assertNotRegex(line, self.missing_debug_regex, message % (lineno, filename))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue