Improve node sequence operators

- Add is_root class attribute to XPathToken
  - '(name)' token yield AttributeNode objects for non-root tokens
This commit is contained in:
Davide Brunato 2019-09-27 18:16:21 +02:00
parent 93c6521a23
commit c367bdfd95
7 changed files with 67 additions and 98 deletions

View File

@ -198,6 +198,7 @@ class XPath1Parser(Parser):
def parse(self, source):
root_token = super(XPath1Parser, self).parse(source)
root_token.is_root = True
try:
root_token.evaluate() # Static context evaluation
except MissingContextError:
@ -233,56 +234,18 @@ literal('(string)')
literal('(float)')
literal('(decimal)')
literal('(integer)')
literal('(name)', bp=10)
@method('(name)')
@method(literal('(name)', bp=10))
def evaluate(self, context=None):
if context is None:
return
name = self.value
if name[0] != '{' and self.parser.default_namespace:
tag = u'{%s}%s' % (self.parser.default_namespace, name)
else:
tag = name
if isinstance(context, XPathSchemaContext):
# Bind with the XSD type
xsd_type = self.match_xsd_type(context.item, name)
if xsd_type is not None:
if isinstance(context.item, AttributeNode):
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
else:
return context.item
elif self.xsd_type is None:
# Untyped evaluation
if is_attribute_node(context.item, name):
return context.item[1]
elif is_element_node(context.item, tag):
return context.item
else:
# XSD typed evaluation
try:
if is_attribute_node(context.item, name):
return self.xsd_type.decode(context.item[1])
elif is_element_node(context.item, tag):
if self.xsd_type.is_simple():
return self.xsd_type.decode(context.item.text)
elif self.xsd_type.has_simple_content():
self.xsd_type.decode(context.item.text)
return context.item
except (TypeError, ValueError):
msg = "Type {!r} is not appropriate for the context item {!r}"
self.wrong_context_type(msg.format(self.xsd_type, context.item))
return [x for x in self.select(context)] or None
@method('(name)')
def select(self, context=None):
if context is None:
return
name = self.value
if name[0] != '{' and self.parser.default_namespace:
tag = u'{%s}%s' % (self.parser.default_namespace, name)
@ -304,7 +267,7 @@ def select(self, context=None):
# Untyped selection
for item in context.iter_children_or_self():
if is_attribute_node(item, name):
yield item[1]
yield item
elif is_element_node(item, tag):
yield item
else:
@ -312,7 +275,7 @@ def select(self, context=None):
for item in context.iter_children_or_self():
try:
if is_attribute_node(item, name):
yield self.xsd_type.decode(item[1])
yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
elif is_element_node(item, tag):
if self.xsd_type.is_simple():
yield self.xsd_type.decode(item.text)
@ -652,15 +615,27 @@ def evaluate(self, context=None):
###
# Union expressions
@method(infix('|', bp=50))
@method('|', bp=50)
def led(self, left):
self.cut_and_sort = True
if left.symbol in {'|', 'union'}:
left.cut_and_sort = False
self[:] = left, self.parser.expression(rbp=50)
return self
@method('|')
def select(self, context=None):
if context is not None:
results = {item for k in range(2) for item in self[k].select(context.copy())}
for item in context.iter():
if item in results:
if context is None:
return
elif not self.cut_and_sort:
for k in range(2):
for item in self[k].select(context.copy()):
yield item
elif is_attribute_node(item) and item[1] in results:
yield item[1]
else:
results = {item for k in range(2) for item in self[k].select(context.copy())}
for item in context.iter_results(results, self.is_root):
yield item
###
@ -705,8 +680,8 @@ def select(self, context=None):
left_results = list(self[0].select(context))
context.size = len(left_results)
for context.position, context.item in enumerate(left_results):
if not is_element_node(context.item):
self.wrong_type("left operand must returns element nodes: {}".format(context.item))
if not is_xpath_node(context.item):
self.wrong_type("left operand must returns XPath nodes: {}".format(context.item))
for result in self[1].select(context):
if is_etree_element(result) or isinstance(result, tuple):
if result not in items:
@ -878,7 +853,7 @@ def select(self, context=None):
for _ in context.iter_attributes():
for result in self[0].select(context):
yield result
yield result[1] if self.is_root else result
@method(axis('namespace'))

View File

@ -400,9 +400,9 @@ def select(self, context=None):
for result in self[0].select(context):
yield result
else:
attribute_name = self[0].evaluate(context) if self else None
name = self[0].evaluate(context) if self else None
for item in context.iter_attributes():
if is_attribute_node(item, attribute_name):
if is_attribute_node(item, name):
yield context.item[1]

View File

@ -646,7 +646,7 @@ def evaluate(self, context=None):
@method(function('string-join', nargs=2))
def evaluate(self, context=None):
items = [self.string_value(s) if is_element_node(s) else s
items = [self.string_value(s) if is_element_node(s) or is_attribute_node(s) else s
for s in self[0].select(context)]
try:
return self.get_argument(context, 1, cls=string_base_type).join(items)

View File

@ -339,6 +339,7 @@ class XPath2Parser(XPath1Parser):
def parse(self, source):
root_token = super(XPath1Parser, self).parse(source)
root_token.is_root = True
if self.schema is None:
try:
@ -381,34 +382,25 @@ register('?')
register('(:')
register(':)')
###
# Node sequence composition
@method(infix('union', bp=50))
def select(self, context=None):
if context is not None:
results = {item for k in range(2) for item in self[k].select(context.copy())}
for item in context.iter():
if item in results:
yield item
XPath2Parser.duplicate('|', 'union')
@method(infix('intersect', bp=55))
def select(self, context=None):
if context is not None:
results = set(self[0].select(context.copy())) & set(self[1].select(context.copy()))
for item in context.iter():
if item in results:
yield item
for item in context.iter_results(results, self.is_root):
yield item
@method(infix('except', bp=55))
def select(self, context=None):
if context is not None:
results = set(self[0].select(context.copy())) - set(self[1].select(context.copy()))
for item in context.iter():
if item in results:
yield item
for item in context.iter_results(results, self.is_root):
yield item
###

View File

@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
from .compat import PY3, urlparse
from .compat import PY3, urlparse, unicode_type
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError
from .datatypes import ncname_validator
@ -23,6 +23,9 @@ from .datatypes import ncname_validator
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
ElementNode = namedtuple('Element', 'tag text attrib')
"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
@ -34,11 +37,15 @@ def is_etree_element(obj):
def elem_iter_strings(elem):
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
if isinstance(elem, ElementNode):
if elem.text is not None:
yield unicode_type(elem.text)
else:
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
###
@ -51,7 +58,7 @@ def elem_iter_strings(elem):
# Element-like objects are used for representing elements and comments, ElementTree-like objects
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
###
def is_element_node(obj, tag=None, default_namespace=None):
def is_element_node(obj, tag=None):
"""
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
Raises a ValueError if the argument tag has to be used but it's in a wrong format.

View File

@ -24,7 +24,7 @@ import locale
import contextlib
from decimal import Decimal
from .compat import string_base_type
from .compat import string_base_type, unicode_type
from .exceptions import xpath_error
from .namespaces import XQT_ERRORS_NAMESPACE
from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
@ -53,8 +53,9 @@ def ordinal(n):
class XPathToken(Token):
"""Base class for XPath tokens."""
comment = None # for XPath 2.0+ comments
xsd_type = None # fox XPath 2.0+ schema types labeling
is_root = False # Flag that is set to True for root token instances
comment = None # for XPath 2.0+ comments
xsd_type = None # fox XPath 2.0+ schema types labeling
def evaluate(self, context=None):
"""
@ -423,33 +424,22 @@ class XPathToken(Token):
locale.setlocale(locale.LC_ALL, default_locale)
###
# XPath data conversion base functions
# XPath data accessors base functions
def data_value(self, obj):
"""
The typed value, as computed by fn:data() on each item. Returns an instance of
UntypedAtomic.
"""
if is_attribute_node(obj):
obj = obj[1]
if obj is None:
return
elif not is_xpath_node(obj):
return obj
elif hasattr(obj, 'type'):
return self.schema_node_value(obj) # Schema context
elif self.xsd_type is None:
return UntypedAtomic(self.string_value(obj))
# XSD type bound data
try:
if is_attribute_node(obj):
return self.xsd_type.decode(obj[1])
elif is_element_node(obj):
return self.xsd_type.decode(obj.text)
except TypeError as err:
self.wrong_type(str(err))
except ValueError as err:
self.wrong_value(str(err))
else:
return UntypedAtomic(self.string_value(obj))
return UntypedAtomic(self.string_value(obj))
def boolean_value(self, obj):
"""
@ -481,7 +471,7 @@ class XPathToken(Token):
elif is_element_node(obj):
return ''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
return obj[1]
return unicode_type(obj[1])
elif is_text_node(obj):
return obj
elif is_document_node(obj):

View File

@ -811,6 +811,9 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("1 and 1", True)
self.check_value("1 and 'jupiter'", True)
self.check_value("0 and 'mars'", False)
self.check_value("mars")
self.check_value("1 and mars", False)
def test_comparison_operators(self):
@ -1160,9 +1163,11 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("a[preceding::a[not(b)]]", [], context=XPathContext(root, item=root[1]))
def test_union(self):
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
root = self.etree.XML('<A min="1" max="10"><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
self.check_selector('/A/B2 | /A/B1', root, root[:2])
self.check_selector('/A/B2 | /A/*', root, root[:])
self.check_selector('/A/B2 | /A/* | /A/B1', root, root[:])
self.check_selector('/A/@min | /A/@max', root, {'1', '10'})
def test_default_namespace(self):
root = self.etree.XML('<foo>bar</foo>')