debian-elementpath/elementpath/xpath_nodes.py

225 lines
6.9 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
Helper functions for XPath nodes and basic data types.
"""
from collections import namedtuple
from .compat import PY3, urlparse
from .namespaces import XML_BASE, XSI_NIL
from .exceptions import ElementPathValueError
from .datatypes import ncname_validator
###
# Node types
AttributeNode = namedtuple('Attribute', 'name value')
"""A namedtuple-based type to represent XPath attributes."""
NamespaceNode = namedtuple('Namespace', 'prefix uri')
"""A namedtuple-based type to represent XPath namespaces."""
###
# Utility functions for ElementTree's Element instances
def is_etree_element(obj):
return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
def elem_iter_strings(elem):
for e in elem.iter():
if e.text is not None:
yield e.text
if e.tail is not None and e is not elem:
yield e.tail
###
# XPath node test functions
#
# XPath has there are 7 kinds of nodes:
#
# element, attribute, text, namespace, processing-instruction, comment, document
#
# Element-like objects are used for representing elements and comments, ElementTree-like objects
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
###
def is_element_node(obj, tag=None):
"""
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if not is_etree_element(obj) or callable(obj.tag):
return False
elif tag is None:
return True
elif not obj.tag:
return obj.tag == tag
elif tag == '*' or tag == '*:*':
return obj.tag != ''
elif tag[0] == '*':
try:
_, name = tag.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
else:
return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
elif tag[-1] == '*':
if tag[0] != '{' or '}' not in tag:
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
else:
return obj.tag == tag
def is_attribute_node(obj, name=None):
"""
Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
Raises a ValueError if the argument name has to be used but it's in a wrong format.
:param obj: the node to be tested.
:param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
are '*', '*:*', '*:local-name' and '{namespace}*'.
"""
if name is None or name == '*' or name == '*:*':
return isinstance(obj, AttributeNode)
elif not isinstance(obj, AttributeNode):
return False
elif name[0] == '*':
try:
_, _name = name.split(':')
except (ValueError, IndexError):
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
else:
return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
elif name[-1] == '*':
if name[0] != '{' or '}' not in name:
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
else:
return obj[0] == name
def is_schema_node(obj):
return hasattr(obj, 'name') and hasattr(obj, 'local_name') and hasattr(obj, 'type')
def is_comment_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
def is_processing_instruction_node(obj):
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
def is_document_node(obj):
return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
def is_namespace_node(obj):
return isinstance(obj, NamespaceNode)
if not PY3:
def is_text_node(obj):
return isinstance(obj, (str, unicode))
else:
def is_text_node(obj):
return isinstance(obj, str)
def is_xpath_node(obj):
return isinstance(obj, tuple) or is_etree_element(obj) or \
is_document_node(obj) or is_text_node(obj) or is_schema_node(obj)
###
# Node accessors: in this implementation node accessors return None instead of empty sequence.
# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
def node_attributes(obj):
if is_element_node(obj):
return obj.attrib
def node_base_uri(obj):
try:
if is_element_node(obj):
return obj.attrib[XML_BASE]
elif is_document_node(obj):
return obj.getroot().attrib[XML_BASE]
except KeyError:
pass
def node_document_uri(obj):
if is_document_node(obj):
try:
uri = obj.getroot().attrib[XML_BASE]
parts = urlparse(uri)
except (KeyError, ValueError):
pass
else:
if parts.scheme and parts.netloc or parts.path.startswith('/'):
return uri
def node_children(obj):
if is_element_node(obj):
return (child for child in obj)
elif is_document_node(obj):
return (child for child in [obj.getroot()])
def node_is_id(obj):
if is_element_node(obj):
return ncname_validator(obj.text)
elif is_attribute_node(obj):
return ncname_validator(obj[1])
def node_is_idrefs(obj):
if is_element_node(obj):
return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
elif is_attribute_node(obj):
return all(ncname_validator(x) for x in obj[1].split())
def node_nilled(obj):
if is_element_node(obj):
return obj.get(XSI_NIL) in ('true', '1')
def node_kind(obj):
if is_element_node(obj):
return 'element'
elif is_attribute_node(obj):
return 'attribute'
elif is_text_node(obj):
return 'text'
elif is_document_node(obj):
return 'document'
elif is_namespace_node(obj):
return 'namespace'
elif is_comment_node(obj):
return 'comment'
elif is_processing_instruction_node(obj):
return 'processing-instruction'
def node_name(obj):
if is_element_node(obj):
return obj.tag
elif is_attribute_node(obj) or is_namespace_node(obj):
return obj[0]