Some improvements to parsers and testing

- Added a test script for testing the package (debugging typos)
  - Added an helper function for nullary operators to the base Parser
  - Comparative tests for XPath 1.0 against lxml.xpath() (double checks)
This commit is contained in:
Davide Brunato 2018-03-02 21:50:03 +01:00
parent c3980243f2
commit 4c8b4d8c8a
9 changed files with 478 additions and 318 deletions

View File

@ -21,7 +21,7 @@ from .exceptions import (
ElementPathError, ElementPathSyntaxError, ElementPathNameError, ElementPathValueError, ElementPathTypeError
)
from .todp_parser import Token, Parser
from .xpath_base import is_etree_element, XPathToken, XPathContext
from .xpath_base import is_etree_element, is_xpath_node, XPathToken, XPathContext
from .xpath1_parser import XPath1Parser
from .xpath2_parser import XPath2Parser
@ -51,12 +51,31 @@ class Selector(object):
def namespaces(self):
return self.parser.namespaces
def findall(self, elem):
def select(self, elem):
context = XPathContext(elem)
return list(self.root_token.select(context))
results = list(self.root_token.select(context))
if len(results) == 1 and self.root_token.label in ('function', 'literal'):
return results[0]
else:
return results
def iter_select(self, elem):
context = XPathContext(elem)
return self.root_token.select(context)
def select(elem, path, namespaces=None, schema=None, parser=XPath2Parser):
parser = parser(namespaces, schema)
root_token = parser.parse(path)
context = XPathContext(elem)
results = list(root_token.select(context))
if len(results) == 1 and root_token.label in ('function', 'literal'):
return results[0]
else:
return results
def iter_select(elem, path, namespaces=None, schema=None, parser=XPath2Parser):
parser = parser(namespaces, schema)
root_token = parser.parse(path)
context = XPathContext(elem)

View File

@ -403,6 +403,12 @@ class Parser(object):
return cls.register(symbol, label='literal', lbp=bp, evaluate=evaluate, nud=nud)
@classmethod
def nullary(cls, symbol, bp=0):
def nud(self):
return self
return cls.register(symbol, label='operator', lbp=bp, nud=nud)
@classmethod
def prefix(cls, symbol, bp=0):
def nud(self):
@ -427,8 +433,6 @@ class Parser(object):
@classmethod
def postfix(cls, symbol, bp=0):
def led(self, left):
import pdb
pdb.set_trace()
self[0:] = left,
return self
return cls.register(symbol, label='operator', lbp=bp, rbp=bp, led=led)

View File

@ -13,8 +13,9 @@ from __future__ import division
from .exceptions import ElementPathSyntaxError, ElementPathTypeError, ElementPathValueError
from .todp_parser import Parser
from .xpath_base import (
XML_ID_ATTRIBUTE, XPathToken, is_etree_element, is_xpath_node, is_element_node,
is_comment_node, is_processing_instruction_node, is_attribute_node, is_text_node
XML_ID_ATTRIBUTE, XPathToken, qname_to_prefixed, is_etree_element, is_xpath_node,
is_element_node, is_document_node, is_comment_node, is_processing_instruction_node,
is_attribute_node, is_text_node
)
@ -45,11 +46,6 @@ class XPath1Parser(Parser):
'string-length', 'normalize-space', 'translate',
'boolean', 'not', 'true', 'false' # Boolean functions
)
RELATIVE_PATH_SYMBOLS = {'descendant-or-self', 'following-sibling', 'preceding-sibling',
'ancestor-or-self', 'descendant', 'attribute', 'following',
'namespace', 'preceding', 'ancestor', 'parent', 'child', 'self'} | {
'(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '/'
}
def __init__(self, namespaces=None, schema=None):
super(XPath1Parser, self).__init__()
@ -75,8 +71,7 @@ class XPath1Parser(Parser):
try:
pattern = axis_pattern_template % symbol.strip()
except AttributeError:
pattern = axis_pattern_template % symbol.symbol
pattern = axis_pattern_template % getattr(symbol, 'symbol')
return cls.register(symbol, pattern=pattern, label='axis', lbp=bp, rbp=bp, nud=nud_)
@classmethod
@ -121,12 +116,11 @@ class XPath1Parser(Parser):
self.value = self.evaluate() # Static context evaluation
return self
pattern_template = '\\b%s(?=\s*\\()'
function_pattern_template = '\\b%s(?=\s*\\()'
try:
pattern = pattern_template % symbol.strip()
pattern = function_pattern_template % symbol.strip()
except AttributeError:
pattern = pattern_template % symbol.symbol
pattern = function_pattern_template % getattr(symbol, 'symbol')
return cls.register(symbol, pattern=pattern, label='function', lbp=bp, rbp=bp, nud=nud_)
def map_reference(self, ref):
@ -167,6 +161,7 @@ XPath1Parser.begin()
register = XPath1Parser.register
literal = XPath1Parser.literal
nullary = XPath1Parser.nullary
prefix = XPath1Parser.prefix
infix = XPath1Parser.infix
postfix = XPath1Parser.postfix
@ -192,7 +187,7 @@ literal('(decimal)')
literal('(integer)')
@method('(name)', bp=10)
@method(literal('(name)', bp=10))
def nud(self):
if self.value[0] != '{' and ':' in self.value:
self.value = self.parser.map_reference(self.value)
@ -219,42 +214,6 @@ def select(self, context):
yield context.item
@method(literal('*'))
def select(self, context):
if not self:
# Wildcard literal
if context.active_iterator is None:
for child in context.iter_children():
if is_element_node(child):
yield child
elif context.principal_node_kind:
if is_attribute_node(context.item):
yield context.item[1]
else:
yield context.item
else:
# Product operator
context.item = self[0].evaluate(context)
yield context.item
@method(literal('.'))
def select(self, context):
yield context.item if context.item is not None else context.root
@method(literal('..'))
def select(self, context):
try:
parent = context.parent_map[context.item]
except KeyError:
pass
else:
if is_element_node(parent):
context.item = parent
yield parent
###
# Comments
@method(literal('(:'))
@ -296,6 +255,250 @@ def evaluate(self, context=None):
self.wrong_name('unknown variable')
###
# Nullary operators (use only the context)
@method(nullary('*'))
def select(self, context):
if not self:
# Wildcard literal
if context.active_iterator is None:
for child in context.iter_children():
if is_element_node(child):
yield child
elif context.principal_node_kind:
if is_attribute_node(context.item):
yield context.item[1]
else:
yield context.item
else:
# Product operator
context.item = self[0].evaluate(context)
yield context.item
@method(nullary('.'))
def select(self, context):
if context.item is not None:
yield context.item
elif is_document_node(context.root):
yield context.root
@method(nullary('..'))
def select(self, context):
try:
parent = context.parent_map[context.item]
except KeyError:
pass
else:
if is_element_node(parent):
context.item = parent
yield parent
###
# Logical Operators
@method(infix('or', bp=20))
def evaluate(self, context=None):
return bool(self[0].evaluate(context) or self[1].evaluate(context))
@method(infix('and', bp=25))
def evaluate(self, context=None):
return bool(self[0].evaluate(context) and self[1].evaluate(context))
@method(infix('=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) == self[1].evaluate(context)
@method(infix('!=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) != self[1].evaluate(context)
@method(infix('<', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) < self[1].evaluate(context)
@method(infix('>', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) > self[1].evaluate(context)
@method(infix('<=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) <= self[1].evaluate(context)
@method(infix('>=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) >= self[1].evaluate(context)
###
# Numerical operators
prefix('+')
prefix('-', bp=90)
@method(infix('+', bp=40))
def evaluate(self, context=None):
if len(self) > 1:
try:
return self[0].evaluate(context) + self[1].evaluate(context)
except TypeError:
raise ElementPathTypeError("a numeric value is required: %r." % self[0])
else:
try:
return +self[0].evaluate(context)
except TypeError:
raise ElementPathTypeError("numeric values are required: %r." % self[:])
@method(infix('-', bp=40))
def evaluate(self, context=None):
try:
try:
return self[0].evaluate(context) - self[1].evaluate(context)
except TypeError:
self.wrong_type("values must be numeric: %r" % [tk.evaluate(context) for tk in self])
except IndexError:
try:
return -self[0].evaluate(context)
except TypeError:
self.wrong_type("value must be numeric: %r" % self[0].evaluate(context))
@method(infix('*', bp=45))
def evaluate(self, context=None):
if self:
return self[0].evaluate(context) * self[1].evaluate(context)
@method(infix('div', bp=45))
def evaluate(self, context=None):
return self[0].evaluate(context) / self[1].evaluate(context)
@method(infix('mod', bp=45))
def evaluate(self, context=None):
return self[0].evaluate(context) % self[1].evaluate(context)
###
# Union expressions
@method(infix('|', bp=50))
def select(self, context):
results = {self.filter_node(elem) for k in range(2) for elem in self[k].select(context)}
for elem in self.root.iter():
if elem in results:
context.item = elem
yield elem
###
# Path expressions
@method('//', bp=80)
@method('/', bp=80)
def nud(self):
next_token = self.parser.next_token
if not self.parser.source_first:
self.wrong_symbol()
elif next_token.symbol == '(end)' and self.symbol == '/':
return self
elif not self.parser.next_token.is_path_step_token():
next_token.wrong_symbol()
self[0:] = self.parser.expression(80),
return self
@method('//', bp=80)
@method('/', bp=80)
def led(self, left):
if not self.parser.next_token.is_path_step_token():
self.parser.next_token.wrong_symbol()
self[0:1] = left, self.parser.expression(80)
return self
@method('/')
def select(self, context):
"""
Child path expression. Selects child:: axis as default (when bind to '*' or '(name)').
"""
if not self:
if is_document_node(context.root):
yield context.root
elif len(self) == 1:
context.item = None
for result in self[0].select(context):
yield result
else:
items = set()
for elem in self[0].select(context):
if not is_element_node(elem):
self.wrong_type("left operand must returns element nodes: %r" % elem)
for result in self[1].select(context.copy(item=elem)):
if is_etree_element(result) or isinstance(result, tuple):
if result not in items:
yield result
items.add(result)
else:
yield result
@method('//')
def select(self, context):
if len(self) == 1:
for _ in context.iter_descendants():
for result in self[0].select(context):
yield result
else:
for elem in self[0].select(context):
if not is_element_node(elem):
self.wrong_type("left operand must returns element nodes: %r" % elem)
for _ in context.iter_descendants(item=elem):
for result in self[1].select(context):
yield result
###
# Parenthesized expressions
@method('(', bp=90)
def nud(self):
self.parser.next_token.unexpected(')')
self[0:] = self.parser.expression(),
self.parser.advance(')')
return self[0]
###
# Predicate filters
@method('[', bp=90)
def led(self, left):
self.parser.next_token.unexpected(']')
self[0:1] = left, self.parser.expression()
self.parser.advance(']')
return self
@method('[')
def select(self, context):
for result in self[0].select(context):
predicate = list(self[1].select(context.copy()))
if len(predicate) == 1 and not isinstance(predicate[0], bool) and \
isinstance(predicate[0], (int, float)):
if context.position == predicate[0] - 1:
context.item = result
yield result
elif self.boolean(predicate):
context.item = result
yield result
###
# Forward Axes
@method(axis('self', bp=80))
@ -331,18 +534,14 @@ def select(self, context):
@method(axis('following-sibling', bp=80))
def select(self, context):
if is_element_node(context.item):
elem = context.item
try:
parent = context.parent_map[elem]
except KeyError:
return
else:
item = context.item
for _ in context.iter_parent():
follows = False
for item in context.iter_children(item=parent):
for child in context.iter_children():
if follows:
for result in self[0].select(context):
yield result
elif item is elem:
elif item is child:
follows = True
@ -361,7 +560,6 @@ def select(self, context):
@method('@', bp=80)
@method('attribute', bp=80)
def nud(self):
self[0:] = self.parser.expression(rbp=80),
if self[0].symbol not in ('*', '(name)'):
@ -370,7 +568,7 @@ def nud(self):
@method('@')
@method(axis('attribute'))
@method(axis('attribute', bp=80))
def select(self, context):
for _ in context.iter_attributes():
for result in self[0].select(context):
@ -381,7 +579,7 @@ def select(self, context):
def select(self, context):
if is_element_node(context.item):
element_class = context.item.__class__
for prefix_, uri in sorted(self.parser.namespaces.items()):
for prefix_, uri in self.parser.namespaces.items():
context.item = element_class(tag=prefix_, text=uri)
yield context.item
@ -390,12 +588,8 @@ def select(self, context):
# Reverse Axes
@method(axis('parent', bp=80))
def select(self, context):
try:
parent = context.parent_map[context.item]
except KeyError:
pass
else:
for result in self[0].select(context.copy(item=parent)):
for _ in context.iter_parent():
for result in self[0].select(context):
yield result
@ -419,17 +613,15 @@ def select(self, context):
@method(axis('preceding-sibling', bp=80))
def select(self, context):
if is_element_node(context.item):
elem = context.item
try:
parent = context.parent_map[elem]
except KeyError:
pass
else:
item = context.item
for parent in context.iter_parent():
for child in parent:
if child is elem:
if child is item:
break
context.item = child
yield child
else:
context.item = child
for result in self[0].select(context):
yield result
@method(axis('preceding', bp=80))
@ -528,30 +720,38 @@ def select(self, context):
@method(function('name', nargs=(0, 1), bp=90))
def evaluate(self, context=None):
try:
return self.name(self[0].evaluate(context))
except IndexError:
if context is None:
self.missing_context()
return self.name(context.item)
@method(function('local-name', nargs=(0, 1), bp=90))
@method(function('namespace-uri', nargs=(0, 1), bp=90))
def evaluate(self, context=None):
try:
name = self.name(self[0])
except IndexError:
if context is None:
self.missing_context()
if context is None:
return
elif not self:
name = self.name(context.item)
if name[0] != '{':
return name
else:
try:
selector = iter(self[0].select(context))
item = next(selector)
except StopIteration:
name = ''
else:
name = self.name(item)
if self.parser.version > '1.0':
try:
next(selector)
except StopIteration:
pass
else:
self.wrong_value("a sequence of more than one item is not allowed as argument")
symbol = self.symbol
if symbol == 'name':
return qname_to_prefixed(name, self.parser.namespaces)
elif not name or name[0] != '{':
return name if symbol == 'local-name' else ''
elif symbol == 'local-name':
return name.split('}')[1]
function('namespace-uri', nargs=1, bp=90)
elif symbol == 'namespace-uri':
return name.split('}')[0][1:]
###
@ -677,205 +877,4 @@ def evaluate(self, context=None):
return False
###
# Logical Operators
@method(infix('or', bp=20))
def evaluate(self, context=None):
return bool(self[0].evaluate(context) or self[1].evaluate(context))
@method(infix('and', bp=25))
def evaluate(self, context=None):
return bool(self[0].evaluate(context) and self[1].evaluate(context))
@method(infix('=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) == self[1].evaluate(context)
@method(infix('!=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) != self[1].evaluate(context)
@method(infix('<', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) < self[1].evaluate(context)
@method(infix('>', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) > self[1].evaluate(context)
@method(infix('<=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) <= self[1].evaluate(context)
@method(infix('>=', bp=30))
def evaluate(self, context=None):
return self[0].evaluate(context) >= self[1].evaluate(context)
###
# Numerical operators
prefix('+')
prefix('-', bp=90)
@method(infix('+', bp=40))
def evaluate(self, context=None):
if len(self) > 1:
try:
return self[0].evaluate(context) + self[1].evaluate(context)
except TypeError:
raise ElementPathTypeError("a numeric value is required: %r." % self[0])
else:
try:
return +self[0].evaluate(context)
except TypeError:
raise ElementPathTypeError("numeric values are required: %r." % self[:])
@method(infix('-', bp=40))
def evaluate(self, context=None):
try:
try:
return self[0].evaluate(context) - self[1].evaluate(context)
except TypeError:
self.wrong_type("values must be numeric: %r" % [tk.evaluate(context) for tk in self])
except IndexError:
try:
return -self[0].evaluate(context)
except TypeError:
self.wrong_type("value must be numeric: %r" % self[0].evaluate(context))
@method(infix('*', bp=45))
def evaluate(self, context=None):
if self:
return self[0].evaluate(context) * self[1].evaluate(context)
@method(infix('div', bp=45))
def evaluate(self, context=None):
return self[0].evaluate(context) / self[1].evaluate(context)
@method(infix('mod', bp=45))
def evaluate(self, context=None):
return self[0].evaluate(context) % self[1].evaluate(context)
###
# Union expressions
@method(infix('|', bp=50))
def select(self, context):
results = {self.filter_node(elem) for k in range(2) for elem in self[k].select(context)}
for elem in self.root.iter():
if elem in results:
context.item = elem
yield elem
###
# Path expressions
@method('//', bp=80)
@method('/', bp=80)
def nud(self):
if not self.parser.source_first:
self.wrong_symbol()
elif self.parser.next_token.symbol == '(end)' and self.symbol == '/':
return self
elif self.parser.next_token.symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
self.parser.next_token.wrong_symbol()
self[0:] = self.parser.expression(80),
return self
@method('//', bp=80)
@method('/', bp=80)
def led(self, left):
if self.parser.next_token.symbol not in self.parser.RELATIVE_PATH_SYMBOLS:
self.parser.next_token.wrong_symbol()
self[0:1] = left, self.parser.expression(80)
return self
@method('/')
def select(self, context):
"""
Child path expression. Selects child:: axis as default (when bind to '*' or '(name)').
"""
if not self:
yield context.root
elif len(self) == 1:
context.item = None
for result in self[0].select(context):
yield result
else:
items = set()
for elem in self[0].select(context):
if not is_element_node(elem):
self.wrong_type("left operand must returns element nodes: %r" % elem)
for result in self[1].select(context.copy(item=elem)):
if is_etree_element(result) or isinstance(result, tuple):
if result not in items:
yield result
items.add(result)
else:
yield result
@method('//')
def select(self, context):
if len(self) == 1:
for _ in context.iter_descendants():
for result in self[0].select(context):
yield result
else:
for elem in self[0].select(context):
if not is_element_node(elem):
self.wrong_type("left operand must returns element nodes: %r" % elem)
for _ in context.iter_descendants(item=elem):
for result in self[1].select(context):
yield result
###
# Parenthesized expressions
@method('(', bp=90)
def nud(self):
self.parser.next_token.unexpected(')')
self[0:] = self.parser.expression(),
self.parser.advance(')')
return self[0]
###
# Predicate filters
@method('[', bp=90)
def led(self, left):
self.parser.next_token.unexpected(']')
self[0:1] = left, self.parser.expression()
self.parser.advance(']')
return self
@method('[')
def select(self, context):
for result in self[0].select(context):
predicate = list(self[1].select(context.copy()))
if len(predicate) == 1 and not isinstance(predicate[0], bool) and \
isinstance(predicate[0], (int, float)):
if context.position == predicate[0] - 1:
context.item = result
yield result
elif self.boolean(predicate):
context.item = result
yield result
XPath1Parser.end()

View File

@ -29,7 +29,6 @@ class XPath2Parser(XPath1Parser):
# XPath 2.0 added functions
'document-node', # Node test functions
)
RELATIVE_PATH_SYMBOLS = XPath1Parser.RELATIVE_PATH_SYMBOLS | {s for s in SYMBOLS if s.endswith("::")}
RESERVED_FUNCTIONS = {
'attribute(', 'comment(', 'document-node(', 'element(', 'empty-sequence(', 'if', 'item', 'node(',

View File

@ -11,12 +11,42 @@
import sys
from .exceptions import ElementPathTypeError, ElementPathValueError
from .todp_parser import Token
import re
_RE_MATCH_NAMESPACE = re.compile(r'{([^}]*)}')
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
XML_ID_ATTRIBUTE = '{%s}id' % XML_NAMESPACE
def get_namespace(name):
try:
return _RE_MATCH_NAMESPACE.match(name).group(1)
except (AttributeError, TypeError):
return ''
def qname_to_prefixed(qname, namespaces):
"""
Transforms a fully qualified name into a prefixed reference using a namespace map.
:param qname: a fully qualified name or a local name.
:param namespaces: Dictionary with the map from prefixes to namespace URIs.
:return: String with a prefixed or local reference.
"""
qname_uri = get_namespace(qname)
for prefix, uri in sorted(namespaces.items(), reverse=True):
if uri != qname_uri:
continue
if prefix:
return qname.replace(u'{%s}' % uri, u'%s:' % prefix)
else:
return qname.replace(u'{%s}' % uri, '')
return qname
###
# XPath node types test functions
#
@ -130,6 +160,11 @@ class XPathToken(Token):
else:
self.wrong_type("an XPath node required: %r" % value)
def is_path_step_token(self):
return self.label == 'axis' or self.symbol in {
'(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '/'
}
# Errors
def missing_context(self):
raise ElementPathValueError("%s: dynamic context required for evaluate." % self)
@ -214,6 +249,19 @@ class XPathContext(object):
self.item, self.size, self.position, self._iterator = status
self._node_kind_test = is_element_node
def iter_parent(self):
status = self.item, self.size, self.position, self._iterator
self._iterator, self._node_kind_test = self.iter_parent, is_element_node
try:
self.item = self.parent_map[self.item]
except KeyError:
pass
else:
yield self.item
self.item, self.size, self.position, self._iterator = status
def iter_descendants(self, item=None):
def _iter_descendants():
elem = self.item

0
tests/__init__.py Normal file
View File

View File

@ -59,11 +59,14 @@ class XPath1ParserTest(unittest.TestCase):
self.assertEqual(list(self.parser.parse(path).select(context)), expected)
def check_select(self, path, root, expected, namespaces=None, schema=None):
selector = select(root, path, namespaces, schema, self.parser.__class__)
if isinstance(expected, type) and issubclass(expected, Exception):
self.assertRaises(expected, list, selector)
self.assertRaises(expected, select, root, path, namespaces, schema, self.parser.__class__)
else:
self.assertEqual(list(selector), expected)
results = select(root, path, namespaces, schema, self.parser.__class__)
if isinstance(expected, set):
self.assertEqual(set(results), expected)
else:
self.assertEqual(results, expected)
def wrong_syntax(self, path):
self.assertRaises(ElementPathSyntaxError, self.parser.parse, path)
@ -114,11 +117,37 @@ class XPath1ParserTest(unittest.TestCase):
self.check_tokenizer("last (:", ['last', '', '(:'])
def test_tokens(self):
# Literals
self.check_token('(string)', 'literal', "'hello' string",
"token(symbol='(string)', value='hello')", 'hello')
self.check_token('(integer)', 'literal', "1999 integer",
"token(symbol='(integer)', value=1999)", 1999)
self.check_token('(float)', 'literal', "3.1415 float",
"token(symbol='(float)', value=3.1415)", 3.1415)
self.check_token('(decimal)', 'literal', "217.35 decimal",
"token(symbol='(decimal)', value=217.35)", 217.35)
self.check_token('(name)', 'literal', "'schema' name",
"token(symbol='(name)', value='schema')", 'schema')
# Axes
self.check_token('self', 'axis', "self axis", "token(symbol='self')")
self.check_token('child', 'axis', "child axis", "token(symbol='child')")
self.check_token('parent', 'axis', "parent axis", "token(symbol='parent')")
self.check_token('ancestor', 'axis', "ancestor axis", "token(symbol='ancestor')")
self.check_token('preceding', 'axis', "preceding axis", "token(symbol='preceding')")
self.check_token('descendant-or-self', 'axis', "descendant-or-self axis")
self.check_token('following-sibling', 'axis', "following-sibling axis")
self.check_token('preceding-sibling', 'axis', "preceding-sibling axis")
self.check_token('ancestor-or-self', 'axis', "ancestor-or-self axis")
self.check_token('descendant', 'axis', "descendant axis")
self.check_token('attribute', 'axis', "attribute axis")
self.check_token('following', 'axis', "following axis")
self.check_token('namespace', 'axis', "namespace axis")
# Functions
self.check_token('position', 'function', "position() function", "token(symbol='position')")
# Operators
self.check_token('and', 'operator', "'and' operator", "token(symbol='and')")
def test_implementation(self):
@ -128,7 +157,7 @@ class XPath1ParserTest(unittest.TestCase):
self.check_tree('child::B1', '(child (B1))')
self.check_tree('A/B//C/D', '(/ (// (/ (A) (B)) (C)) (D))')
self.check_tree('child::*/child::B1', '(/ (child (*)) (child (B1)))')
self.check_tree('attribute::name="Galileo"', '(attribute (= (name) (Galileo)))')
self.check_tree('attribute::name="Galileo"', '(= (attribute (name)) (Galileo))')
self.check_tree('1 + 2 * 3', '(+ (1) (* (2) (3)))')
self.check_tree('(1 + 2) * 3', '(* (+ (1) (2)) (3))')
self.check_tree("false() and true()", '(and (False) (True))')
@ -183,9 +212,23 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("last()", 0)
self.check_value("last()", 3, context=context)
self.check_value("last()-1", 2, context=context)
self.check_value("count((0, 1, 2 + 1, 3 - 1))", 4)
self.check_select("count(5)", root, [1])
# self.check_select("name(A)", root, [1])
self.check_select("name(.)", root, 'A')
self.check_select("name(A)", root, '')
self.check_select("local-name(A)", root, '')
self.check_select("namespace-uri(A)", root, '')
self.check_select("name(B2)", root, 'B2')
self.check_select("local-name(B2)", root, 'B2')
self.check_select("namespace-uri(B2)", root, '')
if self.parser.version <= '1.0':
self.check_select("name(*)", root, 'B1')
root = self.etree.XML('<tst:A xmlns:tst="http://xpath.test/ns"/>')
self.check_select("name(.)", root, 'tst:A', namespaces={'tst': "http://xpath.test/ns"})
self.check_select("local-name(.)", root, 'A')
self.check_select("namespace-uri(.)", root, 'http://xpath.test/ns')
# self.check_select("name(tst:A)", root, ['{http://xpath.test/ns}A'])
def test_string_functions(self):
self.check_value("string(10.0)", '10.0')
@ -267,7 +310,7 @@ class XPath1ParserTest(unittest.TestCase):
def test_child_operator(self):
root = self.etree.XML('<A><B1><C1/></B1><B2/><B3><C1/><C2/></B3></A>')
self.check_select('/', root, [root]) # root element is the child of the document
self.check_select('/', root, []) # a root element is not a document!
self.check_select('/B1', root, [])
self.check_select('/A1', root, [])
self.check_select('/A', root, [root])
@ -284,7 +327,7 @@ class XPath1ParserTest(unittest.TestCase):
def test_context_item_expression(self):
root = self.etree.XML('<A><B1><C/></B1><B2/><B3><C1/><C2/></B3></A>')
self.check_select('.', root, [root])
self.check_select('/././.', root, [root])
self.check_select('/././.', root, [])
self.check_select('/A/.', root, [root])
self.check_select('/A/B1/.', root, [root[0]])
self.check_select('/A/B1/././.', root, [root[0]])
@ -333,7 +376,7 @@ class XPath1ParserTest(unittest.TestCase):
self.check_select('/A/B1/attribute::*', root, ['beta1'])
self.check_select('/A/B1/@*', root, ['beta1'])
self.check_select('/A/B3/attribute::*', root, ['beta2', 'beta3'])
self.check_select('/A/attribute::*', root, ['alpha', '1']) # sorted by attribute name
self.check_select('/A/attribute::*', root, {'1', 'alpha'})
def test_following_axis(self):
root = self.etree.XML('<A><B1><C1/></B1><B2/><B3><C1/><C2/></B3><B4><C1><D1/></C1></B4></A>')
@ -363,7 +406,7 @@ class XPath1ParserTest(unittest.TestCase):
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2></A>')
self.check_select('/A/B1/C2/preceding-sibling::*', root, [root[0][0]])
self.check_select('/A/B2/C4/preceding-sibling::*', root, [root[1][0], root[1][1], root[1][2]])
self.check_select('/A/B1/C2/preceding-sibling::C3', root, [root[0][0]])
self.check_select('/A/B1/C2/preceding-sibling::C3', root, [])
def test_preceding_axis(self):
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2></A>')
@ -398,13 +441,17 @@ class XPath2ParserTest(XPath1ParserTest):
def test_boolean_functions2(self):
root = self.etree.XML('<A><B1/><B2/><B3/></A>')
#self.check_select("boolean((A, 35))", root, True) # Too much arguments
# self.check_select("boolean((A, 35))", root, True) # Too much arguments
def test_numerical_expressions2(self):
self.check_value("5 idiv 2", 2)
self.check_value("-3.5 idiv -2", 1)
self.check_value("-3.5 idiv 2", -1)
def test_node_set_functions2(self):
root = self.etree.XML('<A><B1><C1/><C2/></B1><B2/><B3><C3/><C4/><C5/></B3></A>')
self.check_select("count(5)", root, 1)
class LxmlXPath1ParserTest(XPath1ParserTest):
@ -414,12 +461,17 @@ class LxmlXPath1ParserTest(XPath1ParserTest):
cls.etree = lxml.etree
def check_select(self, path, root, expected, namespaces=None, schema=None):
selector = select(root, path, namespaces, schema, self.parser.__class__)
if isinstance(expected, type) and issubclass(expected, Exception):
self.assertRaises(expected, list, selector)
self.assertRaises(expected, select, root, path, namespaces, schema, self.parser.__class__)
else:
self.assertEqual(list(selector), expected)
# self.assertEqual(root.xpath(path), expected) TODO: check some XPath 1.0 peculiarities before ...
results = select(root, path, namespaces, schema, self.parser.__class__)
if isinstance(expected, set):
self.assertEqual(set(results), expected)
self.assertEqual(set(root.xpath(path)), expected)
else:
self.assertEqual(results, expected)
self.assertEqual(root.xpath(path), expected)
class LxmlXPath2ParserTest(XPath2ParserTest):

39
tests/test_package.py Normal file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c), 2018, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
import unittest
import glob
import fileinput
import os
class PackageTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.test_dir = os.path.dirname(__file__)
cls.source_dir = os.path.join(cls.test_dir, '../elementpath/')
cls.missing_debug_regex = r"(\bimport\s+pdb\b|\bpdb\s*\.\s*set\_trace\(\s*\)|\bprint\s*\()"
def test_missing_debug_statements(self):
message = "\nFound a debug missing statement at line %d or file %r."
filename = None
for line in fileinput.input(glob.glob(self.source_dir + '*.py')):
if fileinput.isfirstline():
filename = fileinput.filename()
lineno = fileinput.lineno()
# noinspection PyCompatibility
self.assertNotRegex(line, self.missing_debug_regex, message % (lineno, filename))
if __name__ == '__main__':
unittest.main()

View File

@ -8,4 +8,4 @@ envlist = py27, py33, py34, py35, py36
[testenv]
deps = lxml
commands = python test_elementpath.py
commands = python tests/test_elementpath.py