Improve node sequence operators
- Add is_root class attribute to XPathToken - '(name)' token yield AttributeNode objects for non-root tokens
This commit is contained in:
parent
93c6521a23
commit
c367bdfd95
|
@ -198,6 +198,7 @@ class XPath1Parser(Parser):
|
|||
|
||||
def parse(self, source):
|
||||
root_token = super(XPath1Parser, self).parse(source)
|
||||
root_token.is_root = True
|
||||
try:
|
||||
root_token.evaluate() # Static context evaluation
|
||||
except MissingContextError:
|
||||
|
@ -233,56 +234,18 @@ literal('(string)')
|
|||
literal('(float)')
|
||||
literal('(decimal)')
|
||||
literal('(integer)')
|
||||
literal('(name)', bp=10)
|
||||
|
||||
|
||||
@method('(name)')
|
||||
@method(literal('(name)', bp=10))
|
||||
def evaluate(self, context=None):
|
||||
if context is None:
|
||||
return
|
||||
name = self.value
|
||||
if name[0] != '{' and self.parser.default_namespace:
|
||||
tag = u'{%s}%s' % (self.parser.default_namespace, name)
|
||||
else:
|
||||
tag = name
|
||||
|
||||
if isinstance(context, XPathSchemaContext):
|
||||
# Bind with the XSD type
|
||||
xsd_type = self.match_xsd_type(context.item, name)
|
||||
if xsd_type is not None:
|
||||
if isinstance(context.item, AttributeNode):
|
||||
primitive_type = self.parser.schema.get_primitive_type(xsd_type)
|
||||
return XSD_BUILTIN_TYPES[primitive_type.local_name].value
|
||||
else:
|
||||
return context.item
|
||||
|
||||
elif self.xsd_type is None:
|
||||
# Untyped evaluation
|
||||
if is_attribute_node(context.item, name):
|
||||
return context.item[1]
|
||||
elif is_element_node(context.item, tag):
|
||||
return context.item
|
||||
else:
|
||||
# XSD typed evaluation
|
||||
try:
|
||||
if is_attribute_node(context.item, name):
|
||||
return self.xsd_type.decode(context.item[1])
|
||||
elif is_element_node(context.item, tag):
|
||||
if self.xsd_type.is_simple():
|
||||
return self.xsd_type.decode(context.item.text)
|
||||
elif self.xsd_type.has_simple_content():
|
||||
self.xsd_type.decode(context.item.text)
|
||||
return context.item
|
||||
|
||||
except (TypeError, ValueError):
|
||||
msg = "Type {!r} is not appropriate for the context item {!r}"
|
||||
self.wrong_context_type(msg.format(self.xsd_type, context.item))
|
||||
return [x for x in self.select(context)] or None
|
||||
|
||||
|
||||
@method('(name)')
|
||||
def select(self, context=None):
|
||||
if context is None:
|
||||
return
|
||||
|
||||
name = self.value
|
||||
if name[0] != '{' and self.parser.default_namespace:
|
||||
tag = u'{%s}%s' % (self.parser.default_namespace, name)
|
||||
|
@ -304,7 +267,7 @@ def select(self, context=None):
|
|||
# Untyped selection
|
||||
for item in context.iter_children_or_self():
|
||||
if is_attribute_node(item, name):
|
||||
yield item[1]
|
||||
yield item
|
||||
elif is_element_node(item, tag):
|
||||
yield item
|
||||
else:
|
||||
|
@ -312,7 +275,7 @@ def select(self, context=None):
|
|||
for item in context.iter_children_or_self():
|
||||
try:
|
||||
if is_attribute_node(item, name):
|
||||
yield self.xsd_type.decode(item[1])
|
||||
yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
|
||||
elif is_element_node(item, tag):
|
||||
if self.xsd_type.is_simple():
|
||||
yield self.xsd_type.decode(item.text)
|
||||
|
@ -652,15 +615,27 @@ def evaluate(self, context=None):
|
|||
|
||||
###
|
||||
# Union expressions
|
||||
@method(infix('|', bp=50))
|
||||
@method('|', bp=50)
|
||||
def led(self, left):
|
||||
self.cut_and_sort = True
|
||||
if left.symbol in {'|', 'union'}:
|
||||
left.cut_and_sort = False
|
||||
self[:] = left, self.parser.expression(rbp=50)
|
||||
return self
|
||||
|
||||
|
||||
@method('|')
|
||||
def select(self, context=None):
|
||||
if context is not None:
|
||||
results = {item for k in range(2) for item in self[k].select(context.copy())}
|
||||
for item in context.iter():
|
||||
if item in results:
|
||||
if context is None:
|
||||
return
|
||||
elif not self.cut_and_sort:
|
||||
for k in range(2):
|
||||
for item in self[k].select(context.copy()):
|
||||
yield item
|
||||
elif is_attribute_node(item) and item[1] in results:
|
||||
yield item[1]
|
||||
else:
|
||||
results = {item for k in range(2) for item in self[k].select(context.copy())}
|
||||
for item in context.iter_results(results, self.is_root):
|
||||
yield item
|
||||
|
||||
|
||||
###
|
||||
|
@ -705,8 +680,8 @@ def select(self, context=None):
|
|||
left_results = list(self[0].select(context))
|
||||
context.size = len(left_results)
|
||||
for context.position, context.item in enumerate(left_results):
|
||||
if not is_element_node(context.item):
|
||||
self.wrong_type("left operand must returns element nodes: {}".format(context.item))
|
||||
if not is_xpath_node(context.item):
|
||||
self.wrong_type("left operand must returns XPath nodes: {}".format(context.item))
|
||||
for result in self[1].select(context):
|
||||
if is_etree_element(result) or isinstance(result, tuple):
|
||||
if result not in items:
|
||||
|
@ -878,7 +853,7 @@ def select(self, context=None):
|
|||
|
||||
for _ in context.iter_attributes():
|
||||
for result in self[0].select(context):
|
||||
yield result
|
||||
yield result[1] if self.is_root else result
|
||||
|
||||
|
||||
@method(axis('namespace'))
|
||||
|
|
|
@ -400,9 +400,9 @@ def select(self, context=None):
|
|||
for result in self[0].select(context):
|
||||
yield result
|
||||
else:
|
||||
attribute_name = self[0].evaluate(context) if self else None
|
||||
name = self[0].evaluate(context) if self else None
|
||||
for item in context.iter_attributes():
|
||||
if is_attribute_node(item, attribute_name):
|
||||
if is_attribute_node(item, name):
|
||||
yield context.item[1]
|
||||
|
||||
|
||||
|
|
|
@ -646,7 +646,7 @@ def evaluate(self, context=None):
|
|||
|
||||
@method(function('string-join', nargs=2))
|
||||
def evaluate(self, context=None):
|
||||
items = [self.string_value(s) if is_element_node(s) else s
|
||||
items = [self.string_value(s) if is_element_node(s) or is_attribute_node(s) else s
|
||||
for s in self[0].select(context)]
|
||||
try:
|
||||
return self.get_argument(context, 1, cls=string_base_type).join(items)
|
||||
|
|
|
@ -339,6 +339,7 @@ class XPath2Parser(XPath1Parser):
|
|||
|
||||
def parse(self, source):
|
||||
root_token = super(XPath1Parser, self).parse(source)
|
||||
root_token.is_root = True
|
||||
|
||||
if self.schema is None:
|
||||
try:
|
||||
|
@ -381,34 +382,25 @@ register('?')
|
|||
register('(:')
|
||||
register(':)')
|
||||
|
||||
|
||||
###
|
||||
# Node sequence composition
|
||||
@method(infix('union', bp=50))
|
||||
def select(self, context=None):
|
||||
if context is not None:
|
||||
results = {item for k in range(2) for item in self[k].select(context.copy())}
|
||||
for item in context.iter():
|
||||
if item in results:
|
||||
yield item
|
||||
XPath2Parser.duplicate('|', 'union')
|
||||
|
||||
|
||||
@method(infix('intersect', bp=55))
|
||||
def select(self, context=None):
|
||||
if context is not None:
|
||||
results = set(self[0].select(context.copy())) & set(self[1].select(context.copy()))
|
||||
for item in context.iter():
|
||||
if item in results:
|
||||
yield item
|
||||
for item in context.iter_results(results, self.is_root):
|
||||
yield item
|
||||
|
||||
|
||||
@method(infix('except', bp=55))
|
||||
def select(self, context=None):
|
||||
if context is not None:
|
||||
results = set(self[0].select(context.copy())) - set(self[1].select(context.copy()))
|
||||
for item in context.iter():
|
||||
if item in results:
|
||||
yield item
|
||||
for item in context.iter_results(results, self.is_root):
|
||||
yield item
|
||||
|
||||
|
||||
###
|
||||
|
|
|
@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
|
|||
"""
|
||||
from collections import namedtuple
|
||||
|
||||
from .compat import PY3, urlparse
|
||||
from .compat import PY3, urlparse, unicode_type
|
||||
from .namespaces import XML_BASE, XSI_NIL
|
||||
from .exceptions import ElementPathValueError
|
||||
from .datatypes import ncname_validator
|
||||
|
@ -23,6 +23,9 @@ from .datatypes import ncname_validator
|
|||
AttributeNode = namedtuple('Attribute', 'name value')
|
||||
"""A namedtuple-based type to represent XPath attributes."""
|
||||
|
||||
ElementNode = namedtuple('Element', 'tag text attrib')
|
||||
"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
|
||||
|
||||
NamespaceNode = namedtuple('Namespace', 'prefix uri')
|
||||
"""A namedtuple-based type to represent XPath namespaces."""
|
||||
|
||||
|
@ -34,11 +37,15 @@ def is_etree_element(obj):
|
|||
|
||||
|
||||
def elem_iter_strings(elem):
|
||||
for e in elem.iter():
|
||||
if e.text is not None:
|
||||
yield e.text
|
||||
if e.tail is not None and e is not elem:
|
||||
yield e.tail
|
||||
if isinstance(elem, ElementNode):
|
||||
if elem.text is not None:
|
||||
yield unicode_type(elem.text)
|
||||
else:
|
||||
for e in elem.iter():
|
||||
if e.text is not None:
|
||||
yield e.text
|
||||
if e.tail is not None and e is not elem:
|
||||
yield e.tail
|
||||
|
||||
|
||||
###
|
||||
|
@ -51,7 +58,7 @@ def elem_iter_strings(elem):
|
|||
# Element-like objects are used for representing elements and comments, ElementTree-like objects
|
||||
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
|
||||
###
|
||||
def is_element_node(obj, tag=None, default_namespace=None):
|
||||
def is_element_node(obj, tag=None):
|
||||
"""
|
||||
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
|
||||
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
|
||||
|
|
|
@ -24,7 +24,7 @@ import locale
|
|||
import contextlib
|
||||
from decimal import Decimal
|
||||
|
||||
from .compat import string_base_type
|
||||
from .compat import string_base_type, unicode_type
|
||||
from .exceptions import xpath_error
|
||||
from .namespaces import XQT_ERRORS_NAMESPACE
|
||||
from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
|
||||
|
@ -53,8 +53,9 @@ def ordinal(n):
|
|||
class XPathToken(Token):
|
||||
"""Base class for XPath tokens."""
|
||||
|
||||
comment = None # for XPath 2.0+ comments
|
||||
xsd_type = None # fox XPath 2.0+ schema types labeling
|
||||
is_root = False # Flag that is set to True for root token instances
|
||||
comment = None # for XPath 2.0+ comments
|
||||
xsd_type = None # fox XPath 2.0+ schema types labeling
|
||||
|
||||
def evaluate(self, context=None):
|
||||
"""
|
||||
|
@ -423,33 +424,22 @@ class XPathToken(Token):
|
|||
locale.setlocale(locale.LC_ALL, default_locale)
|
||||
|
||||
###
|
||||
# XPath data conversion base functions
|
||||
# XPath data accessors base functions
|
||||
def data_value(self, obj):
|
||||
"""
|
||||
The typed value, as computed by fn:data() on each item. Returns an instance of
|
||||
UntypedAtomic.
|
||||
"""
|
||||
if is_attribute_node(obj):
|
||||
obj = obj[1]
|
||||
|
||||
if obj is None:
|
||||
return
|
||||
elif not is_xpath_node(obj):
|
||||
return obj
|
||||
elif hasattr(obj, 'type'):
|
||||
return self.schema_node_value(obj) # Schema context
|
||||
elif self.xsd_type is None:
|
||||
return UntypedAtomic(self.string_value(obj))
|
||||
|
||||
# XSD type bound data
|
||||
try:
|
||||
if is_attribute_node(obj):
|
||||
return self.xsd_type.decode(obj[1])
|
||||
elif is_element_node(obj):
|
||||
return self.xsd_type.decode(obj.text)
|
||||
except TypeError as err:
|
||||
self.wrong_type(str(err))
|
||||
except ValueError as err:
|
||||
self.wrong_value(str(err))
|
||||
else:
|
||||
return UntypedAtomic(self.string_value(obj))
|
||||
return UntypedAtomic(self.string_value(obj))
|
||||
|
||||
def boolean_value(self, obj):
|
||||
"""
|
||||
|
@ -481,7 +471,7 @@ class XPathToken(Token):
|
|||
elif is_element_node(obj):
|
||||
return ''.join(elem_iter_strings(obj))
|
||||
elif is_attribute_node(obj):
|
||||
return obj[1]
|
||||
return unicode_type(obj[1])
|
||||
elif is_text_node(obj):
|
||||
return obj
|
||||
elif is_document_node(obj):
|
||||
|
|
|
@ -811,6 +811,9 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_value("1 and 1", True)
|
||||
self.check_value("1 and 'jupiter'", True)
|
||||
self.check_value("0 and 'mars'", False)
|
||||
|
||||
self.check_value("mars")
|
||||
|
||||
self.check_value("1 and mars", False)
|
||||
|
||||
def test_comparison_operators(self):
|
||||
|
@ -1160,9 +1163,11 @@ class XPath1ParserTest(unittest.TestCase):
|
|||
self.check_value("a[preceding::a[not(b)]]", [], context=XPathContext(root, item=root[1]))
|
||||
|
||||
def test_union(self):
|
||||
root = self.etree.XML('<A><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
|
||||
root = self.etree.XML('<A min="1" max="10"><B1><C1/><C2/><C3/></B1><B2><C1/><C2/><C3/><C4/></B2><B3/></A>')
|
||||
self.check_selector('/A/B2 | /A/B1', root, root[:2])
|
||||
self.check_selector('/A/B2 | /A/*', root, root[:])
|
||||
self.check_selector('/A/B2 | /A/* | /A/B1', root, root[:])
|
||||
self.check_selector('/A/@min | /A/@max', root, {'1', '10'})
|
||||
|
||||
def test_default_namespace(self):
|
||||
root = self.etree.XML('<foo>bar</foo>')
|
||||
|
|
Loading…
Reference in New Issue