diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py
index 1b427a0..b815d28 100644
--- a/elementpath/xpath1_parser.py
+++ b/elementpath/xpath1_parser.py
@@ -198,6 +198,7 @@ class XPath1Parser(Parser):
def parse(self, source):
root_token = super(XPath1Parser, self).parse(source)
+ root_token.is_root = True
try:
root_token.evaluate() # Static context evaluation
except MissingContextError:
@@ -233,56 +234,18 @@ literal('(string)')
literal('(float)')
literal('(decimal)')
literal('(integer)')
-literal('(name)', bp=10)
-@method('(name)')
+@method(literal('(name)', bp=10))
def evaluate(self, context=None):
- if context is None:
- return
- name = self.value
- if name[0] != '{' and self.parser.default_namespace:
- tag = u'{%s}%s' % (self.parser.default_namespace, name)
- else:
- tag = name
-
- if isinstance(context, XPathSchemaContext):
- # Bind with the XSD type
- xsd_type = self.match_xsd_type(context.item, name)
- if xsd_type is not None:
- if isinstance(context.item, AttributeNode):
- primitive_type = self.parser.schema.get_primitive_type(xsd_type)
- return XSD_BUILTIN_TYPES[primitive_type.local_name].value
- else:
- return context.item
-
- elif self.xsd_type is None:
- # Untyped evaluation
- if is_attribute_node(context.item, name):
- return context.item[1]
- elif is_element_node(context.item, tag):
- return context.item
- else:
- # XSD typed evaluation
- try:
- if is_attribute_node(context.item, name):
- return self.xsd_type.decode(context.item[1])
- elif is_element_node(context.item, tag):
- if self.xsd_type.is_simple():
- return self.xsd_type.decode(context.item.text)
- elif self.xsd_type.has_simple_content():
- self.xsd_type.decode(context.item.text)
- return context.item
-
- except (TypeError, ValueError):
- msg = "Type {!r} is not appropriate for the context item {!r}"
- self.wrong_context_type(msg.format(self.xsd_type, context.item))
+ return [x for x in self.select(context)] or None
@method('(name)')
def select(self, context=None):
if context is None:
return
+
name = self.value
if name[0] != '{' and self.parser.default_namespace:
tag = u'{%s}%s' % (self.parser.default_namespace, name)
@@ -304,7 +267,7 @@ def select(self, context=None):
# Untyped selection
for item in context.iter_children_or_self():
if is_attribute_node(item, name):
- yield item[1]
+ yield item
elif is_element_node(item, tag):
yield item
else:
@@ -312,7 +275,7 @@ def select(self, context=None):
for item in context.iter_children_or_self():
try:
if is_attribute_node(item, name):
- yield self.xsd_type.decode(item[1])
+ yield AttributeNode(item[0], self.xsd_type.decode(item[1]))
elif is_element_node(item, tag):
if self.xsd_type.is_simple():
yield self.xsd_type.decode(item.text)
@@ -652,15 +615,27 @@ def evaluate(self, context=None):
###
# Union expressions
-@method(infix('|', bp=50))
+@method('|', bp=50)
+def led(self, left):
+ self.cut_and_sort = True
+ if left.symbol in {'|', 'union'}:
+ left.cut_and_sort = False
+ self[:] = left, self.parser.expression(rbp=50)
+ return self
+
+
+@method('|')
def select(self, context=None):
- if context is not None:
- results = {item for k in range(2) for item in self[k].select(context.copy())}
- for item in context.iter():
- if item in results:
+ if context is None:
+ return
+ elif not self.cut_and_sort:
+ for k in range(2):
+ for item in self[k].select(context.copy()):
yield item
- elif is_attribute_node(item) and item[1] in results:
- yield item[1]
+ else:
+ results = {item for k in range(2) for item in self[k].select(context.copy())}
+ for item in context.iter_results(results, self.is_root):
+ yield item
###
@@ -705,8 +680,8 @@ def select(self, context=None):
left_results = list(self[0].select(context))
context.size = len(left_results)
for context.position, context.item in enumerate(left_results):
- if not is_element_node(context.item):
- self.wrong_type("left operand must returns element nodes: {}".format(context.item))
+ if not is_xpath_node(context.item):
+ self.wrong_type("left operand must returns XPath nodes: {}".format(context.item))
for result in self[1].select(context):
if is_etree_element(result) or isinstance(result, tuple):
if result not in items:
@@ -878,7 +853,7 @@ def select(self, context=None):
for _ in context.iter_attributes():
for result in self[0].select(context):
- yield result
+ yield result[1] if self.is_root else result
@method(axis('namespace'))
diff --git a/elementpath/xpath2_constructors.py b/elementpath/xpath2_constructors.py
index 38b7949..f017e9c 100644
--- a/elementpath/xpath2_constructors.py
+++ b/elementpath/xpath2_constructors.py
@@ -400,9 +400,9 @@ def select(self, context=None):
for result in self[0].select(context):
yield result
else:
- attribute_name = self[0].evaluate(context) if self else None
+ name = self[0].evaluate(context) if self else None
for item in context.iter_attributes():
- if is_attribute_node(item, attribute_name):
+ if is_attribute_node(item, name):
yield context.item[1]
diff --git a/elementpath/xpath2_functions.py b/elementpath/xpath2_functions.py
index 3d12786..e80a801 100644
--- a/elementpath/xpath2_functions.py
+++ b/elementpath/xpath2_functions.py
@@ -646,7 +646,7 @@ def evaluate(self, context=None):
@method(function('string-join', nargs=2))
def evaluate(self, context=None):
- items = [self.string_value(s) if is_element_node(s) else s
+ items = [self.string_value(s) if is_element_node(s) or is_attribute_node(s) else s
for s in self[0].select(context)]
try:
return self.get_argument(context, 1, cls=string_base_type).join(items)
diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py
index 07e05d3..000071e 100644
--- a/elementpath/xpath2_parser.py
+++ b/elementpath/xpath2_parser.py
@@ -339,6 +339,7 @@ class XPath2Parser(XPath1Parser):
def parse(self, source):
root_token = super(XPath1Parser, self).parse(source)
+ root_token.is_root = True
if self.schema is None:
try:
@@ -381,34 +382,25 @@ register('?')
register('(:')
register(':)')
-
###
# Node sequence composition
-@method(infix('union', bp=50))
-def select(self, context=None):
- if context is not None:
- results = {item for k in range(2) for item in self[k].select(context.copy())}
- for item in context.iter():
- if item in results:
- yield item
+XPath2Parser.duplicate('|', 'union')
@method(infix('intersect', bp=55))
def select(self, context=None):
if context is not None:
results = set(self[0].select(context.copy())) & set(self[1].select(context.copy()))
- for item in context.iter():
- if item in results:
- yield item
+ for item in context.iter_results(results, self.is_root):
+ yield item
@method(infix('except', bp=55))
def select(self, context=None):
if context is not None:
results = set(self[0].select(context.copy())) - set(self[1].select(context.copy()))
- for item in context.iter():
- if item in results:
- yield item
+ for item in context.iter_results(results, self.is_root):
+ yield item
###
diff --git a/elementpath/xpath_nodes.py b/elementpath/xpath_nodes.py
index eb1f4ed..85f15c4 100644
--- a/elementpath/xpath_nodes.py
+++ b/elementpath/xpath_nodes.py
@@ -13,7 +13,7 @@ Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
-from .compat import PY3, urlparse
+from .compat import PY3, urlparse, unicode_type
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError
from .datatypes import ncname_validator
@@ -23,6 +23,9 @@ from .datatypes import ncname_validator
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
+ElementNode = namedtuple('Element', 'tag text attrib')
+"""A namedtuple-based type to represent XPath element simple and simple-content nodes."""
+
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
@@ -34,11 +37,15 @@ def is_etree_element(obj):
def elem_iter_strings(elem):
- for e in elem.iter():
- if e.text is not None:
- yield e.text
- if e.tail is not None and e is not elem:
- yield e.tail
+ if isinstance(elem, ElementNode):
+ if elem.text is not None:
+ yield unicode_type(elem.text)
+ else:
+ for e in elem.iter():
+ if e.text is not None:
+ yield e.text
+ if e.tail is not None and e is not elem:
+ yield e.tail
###
@@ -51,7 +58,7 @@ def elem_iter_strings(elem):
# Element-like objects are used for representing elements and comments, ElementTree-like objects
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
###
-def is_element_node(obj, tag=None, default_namespace=None):
+def is_element_node(obj, tag=None):
"""
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
diff --git a/elementpath/xpath_token.py b/elementpath/xpath_token.py
index f1bf231..07cf85e 100644
--- a/elementpath/xpath_token.py
+++ b/elementpath/xpath_token.py
@@ -24,7 +24,7 @@ import locale
import contextlib
from decimal import Decimal
-from .compat import string_base_type
+from .compat import string_base_type, unicode_type
from .exceptions import xpath_error
from .namespaces import XQT_ERRORS_NAMESPACE
from .xpath_nodes import AttributeNode, is_etree_element, is_attribute_node, \
@@ -53,8 +53,9 @@ def ordinal(n):
class XPathToken(Token):
"""Base class for XPath tokens."""
- comment = None # for XPath 2.0+ comments
- xsd_type = None # fox XPath 2.0+ schema types labeling
+ is_root = False # Flag that is set to True for root token instances
+ comment = None # for XPath 2.0+ comments
+ xsd_type = None # fox XPath 2.0+ schema types labeling
def evaluate(self, context=None):
"""
@@ -423,33 +424,22 @@ class XPathToken(Token):
locale.setlocale(locale.LC_ALL, default_locale)
###
- # XPath data conversion base functions
+ # XPath data accessors base functions
def data_value(self, obj):
"""
The typed value, as computed by fn:data() on each item. Returns an instance of
UntypedAtomic.
"""
+ if is_attribute_node(obj):
+ obj = obj[1]
+
if obj is None:
return
elif not is_xpath_node(obj):
return obj
elif hasattr(obj, 'type'):
return self.schema_node_value(obj) # Schema context
- elif self.xsd_type is None:
- return UntypedAtomic(self.string_value(obj))
-
- # XSD type bound data
- try:
- if is_attribute_node(obj):
- return self.xsd_type.decode(obj[1])
- elif is_element_node(obj):
- return self.xsd_type.decode(obj.text)
- except TypeError as err:
- self.wrong_type(str(err))
- except ValueError as err:
- self.wrong_value(str(err))
- else:
- return UntypedAtomic(self.string_value(obj))
+ return UntypedAtomic(self.string_value(obj))
def boolean_value(self, obj):
"""
@@ -481,7 +471,7 @@ class XPathToken(Token):
elif is_element_node(obj):
return ''.join(elem_iter_strings(obj))
elif is_attribute_node(obj):
- return obj[1]
+ return unicode_type(obj[1])
elif is_text_node(obj):
return obj
elif is_document_node(obj):
diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py
index 2121d04..ab39cba 100644
--- a/tests/test_xpath1_parser.py
+++ b/tests/test_xpath1_parser.py
@@ -811,6 +811,9 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("1 and 1", True)
self.check_value("1 and 'jupiter'", True)
self.check_value("0 and 'mars'", False)
+
+ self.check_value("mars")
+
self.check_value("1 and mars", False)
def test_comparison_operators(self):
@@ -1160,9 +1163,11 @@ class XPath1ParserTest(unittest.TestCase):
self.check_value("a[preceding::a[not(b)]]", [], context=XPathContext(root, item=root[1]))
def test_union(self):
- root = self.etree.XML('')
+ root = self.etree.XML('')
self.check_selector('/A/B2 | /A/B1', root, root[:2])
self.check_selector('/A/B2 | /A/*', root, root[:])
+ self.check_selector('/A/B2 | /A/* | /A/B1', root, root[:])
+ self.check_selector('/A/@min | /A/@max', root, {'1', '10'})
def test_default_namespace(self):
root = self.etree.XML('bar')