Split xpath_helpers module in two parts

- Datatype conversion helpers moved fo token class - Node helpers moved to a new module - Kept boolean_value() for backward compatibility
2019-08-03 22:53:37 +02:00 · 2019-08-03 22:53:37 +02:00 · aeba3aab05
parent ad96ad7e7e
commit aeba3aab05
15 changed files with 406 additions and 376 deletions
--- a/doc/conf.py
+++ b/doc/conf.py
@ -29,9 +29,9 @@ copyright = '2018-2019, SISSA (International School for Advanced Studies)'
 author = 'Davide Brunato'

 # The short X.Y version
-version = '1.1'
+version = '1.2'
 # The full version, including alpha/beta/rc tags
-release = '1.1.9'
+release = '1.2.0'


 # -- General configuration ---------------------------------------------------
--- a/elementpath/init.py
+++ b/elementpath/init.py
@ -8,7 +8,7 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
-__version__ = '1.1.9'
+__version__ = '1.2.0'
 __author__ = "Davide Brunato"
 __contact__ = "brunato@sissa.it"
 __copyright__ = "Copyright 2018-2019, SISSA"
@ -23,7 +23,7 @@ from .exceptions import ElementPathError, MissingContextError, \
 from . import datatypes
 from .tdop_parser import Token, Parser
 from .xpath_context import XPathContext, XPathSchemaContext
-from .xpath_helpers import AttributeNode, NamespaceNode
+from .xpath_nodes import AttributeNode, NamespaceNode
 from .xpath_token import XPathToken
 from .xpath1_parser import XPath1Parser
 from .xpath2_constructors import XPath2Parser
--- a/elementpath/schema_proxy.py
+++ b/elementpath/schema_proxy.py
@ -12,7 +12,7 @@ from abc import ABCMeta, abstractmethod
 from .compat import add_metaclass
 from .exceptions import ElementPathTypeError, ElementPathValueError
 from .namespaces import XSD_NAMESPACE
-from .xpath_helpers import is_etree_element
+from .xpath_nodes import is_etree_element
 from .xpath_context import XPathSchemaContext


--- a/elementpath/xpath1_parser.py
+++ b/elementpath/xpath1_parser.py
@ -13,17 +13,17 @@ import math
 import decimal

 from .compat import PY3, string_base_type
-from .exceptions import ElementPathSyntaxError, ElementPathTypeError, ElementPathNameError, \
-    MissingContextError
+from .exceptions import ElementPathSyntaxError, ElementPathTypeError, \
+    ElementPathNameError, MissingContextError
 from .datatypes import UntypedAtomic, DayTimeDuration, YearMonthDuration, XSD_BUILTIN_TYPES
 from .xpath_context import XPathSchemaContext
 from .tdop_parser import Parser, MultiLabel
 from .namespaces import XML_ID, XML_LANG, XPATH_1_DEFAULT_NAMESPACES, \
    XPATH_FUNCTIONS_NAMESPACE, XSD_NAMESPACE, qname_to_prefixed
 from .xpath_token import XPathToken
-from .xpath_helpers import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, is_element_node, \
-    is_document_node, is_attribute_node, is_text_node, is_comment_node, is_processing_instruction_node, \
-    node_name, node_string_value, boolean_value, data_value, string_value, number_value
+from .xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, is_xpath_node, \
+    is_element_node, is_document_node, is_attribute_node, is_text_node, is_comment_node, \
+    is_processing_instruction_node, node_name, node_string_value

 XML_NAME_CHARACTER = (u"A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
                      u"\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD")
@ -687,7 +687,7 @@ def evaluate(self, context=None):
            try:
                next(selector)
            except StopIteration:
-                return data_value(value)
+                return self.data_value(value)
            else:
                self.wrong_context_type("atomized operand is a sequence of length greater than one")

@ -730,7 +730,7 @@ def select(self, context=None):
                    isinstance(predicate[0], (int, float)):
                if context.position == predicate[0] - 1:
                    yield context.item
-            elif boolean_value(predicate, self):
+            elif self.boolean_value(predicate):
                yield context.item


@ -1008,7 +1008,7 @@ def evaluate(self, context=None):
 # String functions
@method(function('string', nargs=1))
 def evaluate(self, context=None):
-    return string_value(self.get_argument(context))
+    return self.string_value(self.get_argument(context))


@method(function('contains', nargs=2))
@ -1020,7 +1020,8 @@ def evaluate(self, context=None):

@method(function('concat'))
 def evaluate(self, context=None):
-    return ''.join(string_value(self.get_argument(context, index=k)) for k in range(len(self)))
+    return ''.join(self.string_value(self.get_argument(context, index=k))
+                   for k in range(len(self)))


@method(function('string-length', nargs=1))
@ -1126,12 +1127,12 @@ def evaluate(self, context=None):
 # Boolean functions
@method(function('boolean', nargs=1))
 def evaluate(self, context=None):
-    return boolean_value(self[0].get_results(context), self)
+    return self.boolean_value(self[0].get_results(context))


@method(function('not', nargs=1))
 def evaluate(self, context=None):
-    return not boolean_value(self[0].get_results(context), self)
+    return not self.boolean_value(self[0].get_results(context))


@method(function('true', nargs=0))
@ -1179,7 +1180,8 @@ def evaluate(self, context=None):

@method(function('sum', nargs=(1, 2)))
 def evaluate(self, context=None):
-    values = [number_value(x) if isinstance(x, UntypedAtomic) else x for x in self[0].select(context)]
+    values = [self.number_value(x) if isinstance(x, UntypedAtomic) else x
+              for x in self[0].select(context)]
    if not values:
        zero = 0 if len(self) == 1 else self.get_argument(context, index=1)
        return [] if zero is None else zero
@ -1191,7 +1193,7 @@ def evaluate(self, context=None):
        return sum(values)

    try:
-        return sum(number_value(x) for x in values)
+        return sum(self.number_value(x) for x in values)
    except TypeError:
        if self.parser.version == '1.0':
            return float('nan')
@ -1205,7 +1207,7 @@ def evaluate(self, context=None):
    if arg is None:
        return float('nan') if self.parser.version == '1.0' else []
    elif is_xpath_node(arg) or self.parser.compatibility_mode:
-        arg = number_value(arg)
+        arg = self.number_value(arg)

    if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
        return arg
@ -1222,7 +1224,7 @@ def evaluate(self, context=None):
    if arg is None:
        return float('nan') if self.parser.version == '1.0' else []
    elif is_xpath_node(arg) or self.parser.compatibility_mode:
-        arg = number_value(arg)
+        arg = self.number_value(arg)

    if isinstance(arg, float) and (math.isnan(arg) or math.isinf(arg)):
        return arg
--- a/elementpath/xpath2_constructors.py
+++ b/elementpath/xpath2_constructors.py
@ -16,7 +16,7 @@ import codecs

 from .compat import unicode_type, urlparse, URLError, string_base_type
 from .exceptions import ElementPathError, xpath_error
-from .xpath_helpers import is_attribute_node, boolean_value, string_value
+from .xpath_nodes import is_attribute_node
 from .datatypes import DateTime10, Date10, Time, XPathGregorianDay, XPathGregorianMonth, \
    XPathGregorianMonthDay, XPathGregorianYear, XPathGregorianYearMonth, UntypedAtomic, Duration, \
    YearMonthDuration, DayTimeDuration, WHITESPACES_PATTERN, QNAME_PATTERN, NMTOKEN_PATTERN, NAME_PATTERN, \
@ -438,7 +438,7 @@ def nud(self):
@method('boolean')
 def evaluate(self, context=None):
    if self.label == 'function':
-        return boolean_value(self[0].get_results(context), self)
+        return self.boolean_value(self[0].get_results(context))

    # xs:boolean constructor
    item = self.get_argument(context)
@ -469,7 +469,7 @@ def nud(self):
@method('string')
 def evaluate(self, context=None):
    if self.label == 'function':
-        return string_value(self.get_argument(context))
+        return self.string_value(self.get_argument(context))
    else:
        item = self.get_argument(context)
        return [] if item is None else str(item)
--- a/elementpath/xpath2_functions.py
+++ b/elementpath/xpath2_functions.py
@ -24,8 +24,8 @@ from .compat import PY3, string_base_type, unicode_chr, urlparse, urljoin, urlli
 from .datatypes import QNAME_PATTERN, DateTime10, Date10, Time, Timezone, Duration, DayTimeDuration
 from .namespaces import prefixed_to_qname, get_namespace
 from .xpath_context import XPathSchemaContext
-from .xpath_helpers import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \
-    node_name, node_string_value, node_nilled, node_base_uri, node_document_uri, data_value, string_value
+from .xpath_nodes import is_document_node, is_xpath_node, is_element_node, is_attribute_node, \
+    node_name, node_string_value, node_nilled, node_base_uri, node_document_uri
 from .xpath2_parser import XPath2Parser

 method = XPath2Parser.method
@ -245,7 +245,7 @@ def evaluate(self, context=None):
@method(function('data', nargs=1))
 def select(self, context=None):
    for item in self[0].select(context):
-        value = data_value(item)
+        value = self.data_value(item)
        if value is None:
            raise self.error('FOTY0012', "argument node does not have a typed value: %r" % item)
        else:
@ -380,7 +380,7 @@ def select(self, context=None):
    nan = False
    results = []
    for item in self[0].select(context):
-        value = data_value(item)
+        value = self.data_value(item)
        if context is not None:
            context.item = value
        if not nan and isinstance(value, float) and math.isnan(value):
@ -440,7 +440,7 @@ def select(self, context=None):

@method(function('unordered', nargs=1))
 def select(self, context=None):
-    for result in sorted(list(self[0].select(context)), key=lambda x: string_value(x)):
+    for result in sorted(list(self[0].select(context)), key=lambda x: self.string_value(x)):
        yield result


@ -646,7 +646,7 @@ def evaluate(self, context=None):

@method(function('string-join', nargs=2))
 def evaluate(self, context=None):
-    items = [string_value(s) if is_element_node(s) else s
+    items = [self.string_value(s) if is_element_node(s) else s
             for s in self[0].select(context)]
    try:
        return self.get_argument(context, 1, cls=string_base_type).join(items)
--- a/elementpath/xpath2_parser.py
+++ b/elementpath/xpath2_parser.py
@ -23,7 +23,7 @@ from .exceptions import ElementPathError, ElementPathTypeError, MissingContextEr
 from .namespaces import XSD_NAMESPACE, XPATH_FUNCTIONS_NAMESPACE, XPATH_2_DEFAULT_NAMESPACES, \
    XSD_NOTATION, XSD_ANY_ATOMIC_TYPE, get_namespace, qname_to_prefixed, prefixed_to_qname
 from .datatypes import XSD_BUILTIN_TYPES
-from .xpath_helpers import is_xpath_node, boolean_value
+from .xpath_nodes import is_xpath_node
 from .tdop_parser import create_tokenizer
 from .xpath1_parser import XML_NCNAME_PATTERN, XPath1Parser
 from .xpath_context import XPathSchemaContext
@ -257,7 +257,7 @@ class XPath2Parser(XPath1Parser):
            except TypeError as err:
                raise self.error('FORG0006', str(err))

-        def cast(value):
+        def cast(_value):
            raise NotImplementedError

        pattern = r'\b%s(?=\s*\(|\s*\(\:.*\:\)\()' % symbol
@ -413,7 +413,7 @@ def nud(self):

@method('if')
 def evaluate(self, context=None):
-    if boolean_value(self[0].evaluate(context), self):
+    if self.boolean_value(self[0].evaluate(context)):
        return self[1].evaluate(context)
    else:
        return self[2].evaluate(context)
@ -421,7 +421,7 @@ def evaluate(self, context=None):

@method('if')
 def select(self, context=None):
-    if boolean_value(list(self[0].select(context)), self):
+    if self.boolean_value(list(self[0].select(context))):
        for result in self[1].select(context):
            yield result
    else:
@ -462,7 +462,7 @@ def evaluate(self, context=None):
    for results in product(*selectors):
        for i in range(len(results)):
            context.variables[self[i * 2][0].value] = results[i]
-        if boolean_value(list(self[-1].select(context.copy())), self):
+        if self.boolean_value(list(self[-1].select(context.copy()))):
            if some:
                return True
        elif not some:
@ -608,8 +608,8 @@ def evaluate(self, context=None):
        raise self.error('XPST0080')

    namespace = get_namespace(atomic_type)
-    if namespace != XSD_NAMESPACE and self.parser.schema is None or \
-            self.parser.schema.get_type(atomic_type) is None:
+    if namespace != XSD_NAMESPACE and \
+            (self.parser.schema is None or self.parser.schema.get_type(atomic_type) is None):
        self.missing_schema("type %r not found in schema" % atomic_type)

    result = [res for res in self[0].select(context)]
--- a/elementpath/xpath_context.py
+++ b/elementpath/xpath_context.py
@ -11,7 +11,7 @@
 import datetime

 from .exceptions import ElementPathTypeError
-from .xpath_helpers import AttributeNode, is_etree_element, is_element_node, \
+from .xpath_nodes import AttributeNode, is_etree_element, is_element_node, \
    is_document_node, is_attribute_node


--- a/elementpath/xpath_helpers.py
+++ b/elementpath/xpath_helpers.py
@ -8,258 +8,14 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
-"""
-Helper functions for XPath nodes and basic data types.
-"""
-from collections import namedtuple
-
-from .compat import PY3, urlparse
-from .namespaces import XML_BASE, XSI_NIL, XSD_UNTYPED, XSD_UNTYPED_ATOMIC
-from .exceptions import ElementPathValueError, xpath_error
-from .datatypes import UntypedAtomic, ncname_validator
-
-###
-# Node types
-AttributeNode = namedtuple('Attribute', 'name value')
-"""A namedtuple-based type to represent XPath attributes."""
-
-NamespaceNode = namedtuple('Namespace', 'prefix uri')
-"""A namedtuple-based type to represent XPath namespaces."""
+from .exceptions import xpath_error
+from .xpath_nodes import is_element_node


-###
-# Utility functions for ElementTree's Element instances
-def is_etree_element(obj):
-    return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
-
-
-def elem_iter_strings(elem):
-    for e in elem.iter():
-        if e.text is not None:
-            yield e.text
-        if e.tail is not None and e is not elem:
-            yield e.tail
-
-
-###
-# XPath node test functions
-#
-# XPath has there are 7 kinds of nodes:
-#
-#    element, attribute, text, namespace, processing-instruction, comment, document
-#
-# Element-like objects are used for representing elements and comments, ElementTree-like objects
-# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
-###
-def is_element_node(obj, tag=None):
-    """
-    Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
-    Raises a ValueError if the argument tag has to be used but it's in a wrong format.
-
-    :param obj: the node to be tested.
-    :param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
-    are '*', '*:*', '*:local-name' and '{namespace}*'.
-    """
-    if not is_etree_element(obj) or callable(obj.tag):
-        return False
-    elif tag is None:
-        return True
-    elif not obj.tag:
-        return obj.tag == tag
-    elif tag == '*' or tag == '*:*':
-        return obj.tag != ''
-    elif tag[0] == '*':
-        try:
-            _, name = tag.split(':')
-        except (ValueError, IndexError):
-            raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
-        else:
-            return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
-    elif tag[-1] == '*':
-        if tag[0] != '{' or '}' not in tag:
-            raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
-        return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
-    else:
-        return obj.tag == tag
-
-
-def is_attribute_node(obj, name=None):
-    """
-    Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
-    Raises a ValueError if the argument name has to be used but it's in a wrong format.
-
-    :param obj: the node to be tested.
-    :param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
-    are '*', '*:*', '*:local-name' and '{namespace}*'.
-    """
-    if name is None or name == '*' or name == '*:*':
-        return isinstance(obj, AttributeNode)
-    elif not isinstance(obj, AttributeNode):
-        return False
-    elif name[0] == '*':
-        try:
-            _, _name = name.split(':')
-        except (ValueError, IndexError):
-            raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
-        else:
-            return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
-    elif name[-1] == '*':
-        if name[0] != '{' or '}' not in name:
-            raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
-        return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
-    else:
-        return obj[0] == name
-
-
-def is_comment_node(obj):
-    return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
-
-
-def is_processing_instruction_node(obj):
-    return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
-
-
-def is_document_node(obj):
-    return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
-
-
-def is_namespace_node(obj):
-    return isinstance(obj, NamespaceNode)
-
-
-if not PY3:
-    def is_text_node(obj):
-        return isinstance(obj, (str, unicode))
-else:
-    def is_text_node(obj):
-        return isinstance(obj, str)
-
-
-def is_xpath_node(obj):
-    return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj)
-
-
-###
-# Node accessors: in this implementation node accessors return None instead of empty sequence.
-# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
-def node_attributes(obj):
-    if is_element_node(obj):
-        return obj.attrib
-
-
-def node_base_uri(obj):
-    try:
-        if is_element_node(obj):
-            return obj.attrib[XML_BASE]
-        elif is_document_node(obj):
-            return obj.getroot().attrib[XML_BASE]
-    except KeyError:
-        pass
-
-
-def node_document_uri(obj):
-    if is_document_node(obj):
-        try:
-            uri = obj.getroot().attrib[XML_BASE]
-            parts = urlparse(uri)
-        except (KeyError, ValueError):
-            pass
-        else:
-            if parts.scheme and parts.netloc or parts.path.startswith('/'):
-                return uri
-
-
-def node_children(obj):
-    if is_element_node(obj):
-        return (child for child in obj)
-    elif is_document_node(obj):
-        return (child for child in [obj.getroot()])
-
-
-def node_is_id(obj):
-    if is_element_node(obj):
-        return ncname_validator(obj.text)
-    elif is_attribute_node(obj):
-        return ncname_validator(obj[1])
-
-
-def node_is_idrefs(obj):
-    if is_element_node(obj):
-        return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
-    elif is_attribute_node(obj):
-        return all(ncname_validator(x) for x in obj[1].split())
-
-
-def node_nilled(obj):
-    if is_element_node(obj):
-        return obj.get(XSI_NIL) in ('true', '1')
-
-
-def node_kind(obj):
-    if is_element_node(obj):
-        return 'element'
-    elif is_attribute_node(obj):
-        return 'attribute'
-    elif is_text_node(obj):
-        return 'text'
-    elif is_document_node(obj):
-        return 'document'
-    elif is_namespace_node(obj):
-        return 'namespace'
-    elif is_comment_node(obj):
-        return 'comment'
-    elif is_processing_instruction_node(obj):
-        return 'processing-instruction'
-
-
-def node_name(obj):
-    if is_element_node(obj):
-        return obj.tag
-    elif is_attribute_node(obj) or is_namespace_node(obj):
-        return obj[0]
-
-
-def node_string_value(obj):
-    if is_element_node(obj):
-        return u''.join(elem_iter_strings(obj))
-    elif is_attribute_node(obj):
-        return obj[1]
-    elif is_text_node(obj):
-        return obj
-    elif is_document_node(obj):
-        return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
-    elif is_namespace_node(obj):
-        return obj[1]
-    elif is_comment_node(obj):
-        return obj.text
-    elif is_processing_instruction_node(obj):
-        return obj.text
-
-
-def node_type_name(obj, schema=None):
-    if is_element_node(obj):
-        if schema is not None:
-            xsd_element = schema.get_element(obj.tag)
-            if xsd_element is not None:
-                return xsd_element.type.name
-        return XSD_UNTYPED
-
-    elif is_attribute_node(obj):
-        if schema is not None:
-            xsd_attribute = schema.get_attribute(obj[0])
-            if xsd_attribute is not None:
-                return xsd_attribute.type.name
-        return XSD_UNTYPED_ATOMIC
-
-    elif is_text_node(obj):
-        return XSD_UNTYPED_ATOMIC
-
-
-###
-# XPath base functions
 def boolean_value(obj, token=None):
    """
    The effective boolean value, as computed by fn:boolean().
+    Moved to token class but kept for backward compatibility.
    """
    if isinstance(obj, list):
        if not obj:
@ -280,38 +36,3 @@ def boolean_value(obj, token=None):
            message="Effective boolean value is not defined for {}.".format(obj)
        )
    return bool(obj)
-
-
-def string_value(obj):
-    """
-    The string value, as computed by fn:string().
-    """
-    if obj is None:
-        return ''
-    elif is_xpath_node(obj):
-        return node_string_value(obj)
-    else:
-        return str(obj)
-
-
-def data_value(obj):
-    """
-    The typed value, as computed by fn:data() on each item. Returns an instance of
-    UntypedAtomic.
-    """
-    if obj is None:
-        return
-    elif not is_xpath_node(obj):
-        return obj
-    else:
-        return UntypedAtomic(node_string_value(obj))
-
-
-def number_value(obj):
-    """
-    The numeric value, as computed by fn:number() on each item. Returns a float value.
-    """
-    try:
-        return float(node_string_value(obj) if is_xpath_node(obj) else obj)
-    except (TypeError, ValueError):
-        return float('nan')
--- a/elementpath/xpath_nodes.py
+++ b/elementpath/xpath_nodes.py
@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+"""
+Helper functions for XPath nodes and basic data types.
+"""
+from collections import namedtuple
+
+from .compat import PY3, urlparse
+from .namespaces import XML_BASE, XSI_NIL
+from .exceptions import ElementPathValueError, xpath_error
+from .datatypes import ncname_validator
+
+###
+# Node types
+AttributeNode = namedtuple('Attribute', 'name value')
+"""A namedtuple-based type to represent XPath attributes."""
+
+NamespaceNode = namedtuple('Namespace', 'prefix uri')
+"""A namedtuple-based type to represent XPath namespaces."""
+
+
+###
+# Utility functions for ElementTree's Element instances
+def is_etree_element(obj):
+    return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
+
+
+def elem_iter_strings(elem):
+    for e in elem.iter():
+        if e.text is not None:
+            yield e.text
+        if e.tail is not None and e is not elem:
+            yield e.tail
+
+
+###
+# XPath node test functions
+#
+# XPath has there are 7 kinds of nodes:
+#
+#    element, attribute, text, namespace, processing-instruction, comment, document
+#
+# Element-like objects are used for representing elements and comments, ElementTree-like objects
+# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
+###
+def is_element_node(obj, tag=None):
+    """
+    Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
+    Raises a ValueError if the argument tag has to be used but it's in a wrong format.
+
+    :param obj: the node to be tested.
+    :param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
+    are '*', '*:*', '*:local-name' and '{namespace}*'.
+    """
+    if not is_etree_element(obj) or callable(obj.tag):
+        return False
+    elif tag is None:
+        return True
+    elif not obj.tag:
+        return obj.tag == tag
+    elif tag == '*' or tag == '*:*':
+        return obj.tag != ''
+    elif tag[0] == '*':
+        try:
+            _, name = tag.split(':')
+        except (ValueError, IndexError):
+            raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
+        else:
+            return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
+    elif tag[-1] == '*':
+        if tag[0] != '{' or '}' not in tag:
+            raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
+        return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
+    else:
+        return obj.tag == tag
+
+
+def is_attribute_node(obj, name=None):
+    """
+    Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
+    Raises a ValueError if the argument name has to be used but it's in a wrong format.
+
+    :param obj: the node to be tested.
+    :param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
+    are '*', '*:*', '*:local-name' and '{namespace}*'.
+    """
+    if name is None or name == '*' or name == '*:*':
+        return isinstance(obj, AttributeNode)
+    elif not isinstance(obj, AttributeNode):
+        return False
+    elif name[0] == '*':
+        try:
+            _, _name = name.split(':')
+        except (ValueError, IndexError):
+            raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
+        else:
+            return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
+    elif name[-1] == '*':
+        if name[0] != '{' or '}' not in name:
+            raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
+        return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
+    else:
+        return obj[0] == name
+
+
+def is_comment_node(obj):
+    return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
+
+
+def is_processing_instruction_node(obj):
+    return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
+
+
+def is_document_node(obj):
+    return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
+
+
+def is_namespace_node(obj):
+    return isinstance(obj, NamespaceNode)
+
+
+if not PY3:
+    def is_text_node(obj):
+        return isinstance(obj, (str, unicode))
+else:
+    def is_text_node(obj):
+        return isinstance(obj, str)
+
+
+def is_xpath_node(obj):
+    return isinstance(obj, tuple) or is_etree_element(obj) or is_document_node(obj) or is_text_node(obj)
+
+
+###
+# Node accessors: in this implementation node accessors return None instead of empty sequence.
+# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
+def node_attributes(obj):
+    if is_element_node(obj):
+        return obj.attrib
+
+
+def node_base_uri(obj):
+    try:
+        if is_element_node(obj):
+            return obj.attrib[XML_BASE]
+        elif is_document_node(obj):
+            return obj.getroot().attrib[XML_BASE]
+    except KeyError:
+        pass
+
+
+def node_document_uri(obj):
+    if is_document_node(obj):
+        try:
+            uri = obj.getroot().attrib[XML_BASE]
+            parts = urlparse(uri)
+        except (KeyError, ValueError):
+            pass
+        else:
+            if parts.scheme and parts.netloc or parts.path.startswith('/'):
+                return uri
+
+
+def node_children(obj):
+    if is_element_node(obj):
+        return (child for child in obj)
+    elif is_document_node(obj):
+        return (child for child in [obj.getroot()])
+
+
+def node_is_id(obj):
+    if is_element_node(obj):
+        return ncname_validator(obj.text)
+    elif is_attribute_node(obj):
+        return ncname_validator(obj[1])
+
+
+def node_is_idrefs(obj):
+    if is_element_node(obj):
+        return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
+    elif is_attribute_node(obj):
+        return all(ncname_validator(x) for x in obj[1].split())
+
+
+def node_nilled(obj):
+    if is_element_node(obj):
+        return obj.get(XSI_NIL) in ('true', '1')
+
+
+def node_kind(obj):
+    if is_element_node(obj):
+        return 'element'
+    elif is_attribute_node(obj):
+        return 'attribute'
+    elif is_text_node(obj):
+        return 'text'
+    elif is_document_node(obj):
+        return 'document'
+    elif is_namespace_node(obj):
+        return 'namespace'
+    elif is_comment_node(obj):
+        return 'comment'
+    elif is_processing_instruction_node(obj):
+        return 'processing-instruction'
+
+
+def node_name(obj):
+    if is_element_node(obj):
+        return obj.tag
+    elif is_attribute_node(obj) or is_namespace_node(obj):
+        return obj[0]
+
+
+def node_string_value(obj):
+    if is_element_node(obj):
+        return u''.join(elem_iter_strings(obj))
+    elif is_attribute_node(obj):
+        return obj[1]
+    elif is_text_node(obj):
+        return obj
+    elif is_document_node(obj):
+        return u''.join(e.text for e in obj.getroot().iter() if e.text is not None)
+    elif is_namespace_node(obj):
+        return obj[1]
+    elif is_comment_node(obj):
+        return obj.text
+    elif is_processing_instruction_node(obj):
+        return obj.text
+
+
+###
+# XPath base functions
+def boolean_value(obj, token=None):
+    """
+    The effective boolean value, as computed by fn:boolean().
+    Moved to token class but kept for backward compatibility.
+    """
+    if isinstance(obj, list):
+        if not obj:
+            return False
+        elif isinstance(obj[0], tuple) or is_element_node(obj[0]):
+            return True
+        elif len(obj) == 1:
+            return bool(obj[0])
+        else:
+            raise xpath_error(
+                code='FORG0006', token=token, prefix=getattr(token, 'error_prefix', 'err'),
+                message="Effective boolean value is not defined for a sequence of two or "
+                "more items not starting with an XPath node.",
+            )
+    elif isinstance(obj, tuple) or is_element_node(obj):
+        raise xpath_error(
+            code='FORG0006', token=token, prefix=getattr(token, 'error_prefix', 'err'),
+            message="Effective boolean value is not defined for {}.".format(obj)
+        )
+    return bool(obj)
--- a/elementpath/xpath_token.py
+++ b/elementpath/xpath_token.py
@ -25,9 +25,9 @@ import contextlib
 from .compat import string_base_type
 from .exceptions import xpath_error
 from .namespaces import XQT_ERRORS_NAMESPACE
-from .xpath_helpers import AttributeNode, is_etree_element, is_document_node, boolean_value, \
-    string_value, data_value, number_value
-from .datatypes import UntypedAtomic, Timezone, DayTimeDuration
+from .xpath_nodes import AttributeNode, is_etree_element, \
+    is_element_node, is_document_node, is_xpath_node, node_string_value
+from .datatypes import UntypedAtomic, Timezone, DayTimeDuration, XSD_BUILTIN_TYPES
 from .tdop_parser import Token


@ -167,12 +167,12 @@ class XPathToken(Token):
        if cls is not None and not isinstance(item, cls):
            if self.parser.compatibility_mode:
                if issubclass(cls, string_base_type):
-                    return string_value(item)
+                    return self.string_value(item)
                elif issubclass(cls, float):
-                    return number_value(item)
+                    return self.number_value(item)

            if self.parser.version > '1.0':
-                value = data_value(item)
+                value = self.data_value(item)
                if isinstance(value, UntypedAtomic):
                    try:
                        return str(value) if issubclass(cls, string_base_type) else cls(value)
@ -193,7 +193,7 @@ class XPathToken(Token):
        :param context: the XPath context.
        """
        for item in self.select(context):
-            value = data_value(item)
+            value = self.data_value(item)
            if value is None:
                raise self.error('FOTY0012', "argument node does not have a typed value: {}".format(item))
            else:
@ -240,21 +240,22 @@ class XPathToken(Token):
            try:
                if isinstance(operand1[0], bool):
                    if len(operand1) == 1:
-                        return [(operand1[0], boolean_value(operand2, self))]
+                        return [(operand1[0], self.boolean_value(operand2))]
                if isinstance(operand2[0], bool):
                    if len(operand2) == 1:
-                        return [(boolean_value(operand1, self), operand2[0])]
+                        return [(self.boolean_value(operand1), operand2[0])]
            except IndexError:
                return []

            # Converts to float for lesser-greater operators (3.)
            if self.symbol in ('<', '<=', '>', '>='):
                return [
-                    (float(data_value(value1)), float(data_value(value2)))
+                    (float(self.data_value(value1)), float(self.data_value(value2)))
                    for value1 in operand1 for value2 in operand2
                ]

-        return [(data_value(value1), data_value(value2)) for value1 in operand1 for value2 in operand2]
+        return [(self.data_value(value1), self.data_value(value2))
+                for value1 in operand1 for value2 in operand2]

    def get_results(self, context):
        """
@ -355,6 +356,68 @@ class XPathToken(Token):
        finally:
            locale.setlocale(locale.LC_ALL, default_locale)

+    ###
+    # XPath data conversion base functions
+    def data_value(self, obj):
+        """
+        The typed value, as computed by fn:data() on each item. Returns an instance of
+        UntypedAtomic.
+        """
+        if obj is None:
+            return
+        elif not is_xpath_node(obj):
+            return obj
+        elif not hasattr(obj, 'type'):
+            return UntypedAtomic(node_string_value(obj))
+        elif obj.type.is_simple():
+            # In case of schema element or attribute use a the sample value
+            # of the primitive type
+            primitive_type = self.parser.schema.get_primitive_type(obj.type)
+            return XSD_BUILTIN_TYPES[primitive_type.local_name].value
+
+    def boolean_value(self, obj):
+        """
+        The effective boolean value, as computed by fn:boolean().
+        """
+        if isinstance(obj, list):
+            if not obj:
+                return False
+            elif isinstance(obj[0], tuple) or is_element_node(obj[0]):
+                return True
+            elif len(obj) == 1:
+                return bool(obj[0])
+            else:
+                raise self.error(
+                    code='FORG0006',
+                    message="Effective boolean value is not defined for a sequence of two or "
+                            "more items not starting with an XPath node.",
+                )
+        elif isinstance(obj, tuple) or is_element_node(obj):
+            raise self.error('FORG0006', "Effective boolean value is not defined for {}.".format(obj))
+        return bool(obj)
+
+    @staticmethod
+    def string_value(obj):
+        """
+        The string value, as computed by fn:string().
+        """
+        if obj is None:
+            return ''
+        elif is_xpath_node(obj):
+            return node_string_value(obj)
+        else:
+            return str(obj)
+
+    @staticmethod
+    def number_value(obj):
+        """
+        The numeric value, as computed by fn:number() on each item. Returns a float value.
+        """
+        try:
+            return float(node_string_value(obj) if is_xpath_node(obj) else obj)
+        except (TypeError, ValueError):
+            return float('nan')
+
    ###
    # Error handling helpers
    def error(self, code, message=None):
--- a/setup.py
+++ b/setup.py
@ -15,7 +15,7 @@ with open("README.rst") as readme:

 setup(
    name='elementpath',
-    version='1.1.9',
+    version='1.2.0',
    packages=['elementpath'],
    author='Davide Brunato',
    author_email='brunato@sissa.it',
--- a/tests/test_elementpath.py
+++ b/tests/test_elementpath.py
@ -24,7 +24,7 @@ import unittest

 if __name__ == '__main__':
    try:
-        from tests.test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, XPathHelpersTest
+        from tests.test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, NodeHelpersTest
        from tests.test_datatypes import UntypedAtomicTest, DateTimeTypesTest, DurationTypesTest, TimezoneTypeTest
        from tests.test_context import XPathContextTest
        from tests.test_xpath1_parser import XPath1ParserTest, LxmlXPath1ParserTest
@ -34,7 +34,7 @@ if __name__ == '__main__':
        from tests.test_package import PackageTest
    except ImportError:
        # Python 2 fallback
-        from test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, XPathHelpersTest
+        from test_helpers import ExceptionHelpersTest, NamespaceHelpersTest, NodeHelpersTest
        from test_datatypes import UntypedAtomicTest, DateTimeTypesTest, DurationTypesTest, TimezoneTypeTest
        from test_context import XPathContextTest
        from test_xpath1_parser import XPath1ParserTest, LxmlXPath1ParserTest
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@ -12,17 +12,16 @@
 from __future__ import unicode_literals
 import unittest
 import io
-import math
 import xml.etree.ElementTree as ElementTree

-from xmlschema import XMLSchema
 from elementpath.exceptions import ElementPathError, xpath_error
-from elementpath.schema_proxy import XMLSchemaProxy
-from elementpath.namespaces import XSD_NAMESPACE, get_namespace, qname_to_prefixed, prefixed_to_qname
-from elementpath.xpath_helpers import AttributeNode, NamespaceNode, is_etree_element, is_element_node, \
-    is_attribute_node, is_comment_node, is_document_node, is_namespace_node, is_processing_instruction_node, \
-    is_text_node, node_attributes, node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \
-    node_nilled, node_kind, node_name, node_string_value, node_type_name, boolean_value, data_value, number_value
+from elementpath.namespaces import XSD_NAMESPACE, get_namespace, qname_to_prefixed, \
+    prefixed_to_qname
+from elementpath.xpath_nodes import AttributeNode, NamespaceNode, is_etree_element, \
+    is_element_node, is_attribute_node, is_comment_node, is_document_node, \
+    is_namespace_node, is_processing_instruction_node, is_text_node, node_attributes, \
+    node_base_uri, node_document_uri, node_children, node_is_id, node_is_idrefs, \
+    node_nilled, node_kind, node_name, node_string_value
 from elementpath.xpath1_parser import XPath1Parser


@ -78,7 +77,7 @@ class NamespaceHelpersTest(unittest.TestCase):
            prefixed_to_qname('foo:', {'': 'ns'})


-class XPathHelpersTest(unittest.TestCase):
+class NodeHelpersTest(unittest.TestCase):
    elem = ElementTree.XML('<node a1="10"/>')

    def test_is_etree_element_function(self):
@ -245,40 +244,6 @@ class XPathHelpersTest(unittest.TestCase):
        self.assertIsNone(node_string_value(None))
        self.assertIsNone(node_string_value(10))

-    def test_node_type_name_function(self):
-        schema = XMLSchemaProxy(
-            XMLSchema("""<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
-                <xs:attribute name="slot" type="xs:token" />
-                <xs:element name="frame" type="xs:decimal" />
-            </xs:schema>""")
-        )
-        elem = ElementTree.Element('frame')
-        self.assertEqual(node_type_name(elem, schema), '{http://www.w3.org/2001/XMLSchema}decimal')
-        self.assertEqual(node_type_name(elem), '{http://www.w3.org/2001/XMLSchema}untyped')
-        elem = ElementTree.Element('alpha')
-        self.assertEqual(node_type_name(elem, schema), '{http://www.w3.org/2001/XMLSchema}untyped')
-
-        attr = AttributeNode('slot', 'x1')
-        self.assertEqual(node_type_name(attr, schema), '{http://www.w3.org/2001/XMLSchema}token')
-        self.assertEqual(node_type_name(attr), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
-        attr = AttributeNode('alpha', 'x1')
-        self.assertEqual(node_type_name(attr, schema), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
-
-        self.assertEqual(node_type_name('slot'), '{http://www.w3.org/2001/XMLSchema}untypedAtomic')
-        self.assertIsNone(node_type_name(10))
-
-    def test_boolean_value_function(self):
-        elem = ElementTree.Element('A')
-        with self.assertRaises(TypeError):
-            boolean_value(elem)
-
-    def test_data_value_function(self):
-        self.assertIsNone(data_value(None))
-
-    def test_number_value_function(self):
-        self.assertEqual(number_value("19"), 19)
-        self.assertTrue(math.isnan(number_value("not a number")))
-

 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_xpath1_parser.py
+++ b/tests/test_xpath1_parser.py
@ -208,6 +208,21 @@ class XPath1ParserTest(unittest.TestCase):
            else:
                self.assertTrue(expected(results))

+    def test_boolean_value_function(self):
+        token = self.parser.parse('true()')
+        elem = ElementTree.Element('A')
+        with self.assertRaises(TypeError):
+            token.boolean_value(elem)
+
+    def test_data_value_function(self):
+        token = self.parser.parse('true()')
+        self.assertIsNone(token.data_value(None))
+
+    def test_number_value_function(self):
+        token = self.parser.parse('true()')
+        self.assertEqual(token.number_value("19"), 19)
+        self.assertTrue(math.isnan(token.number_value("not a number")))
+
    # Wrong XPath expression checker shortcuts
    def wrong_syntax(self, path):
        self.assertRaises(SyntaxError, self.parser.parse, path)