225 lines
6.9 KiB
Python
225 lines
6.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
"""
|
|
Helper functions for XPath nodes and basic data types.
|
|
"""
|
|
from collections import namedtuple
|
|
|
|
from .compat import PY3, urlparse
|
|
from .namespaces import XML_BASE, XSI_NIL
|
|
from .exceptions import ElementPathValueError
|
|
from .datatypes import ncname_validator
|
|
|
|
###
|
|
# Node types
|
|
AttributeNode = namedtuple('Attribute', 'name value')
|
|
"""A namedtuple-based type to represent XPath attributes."""
|
|
|
|
NamespaceNode = namedtuple('Namespace', 'prefix uri')
|
|
"""A namedtuple-based type to represent XPath namespaces."""
|
|
|
|
|
|
###
|
|
# Utility functions for ElementTree's Element instances
|
|
def is_etree_element(obj):
|
|
return hasattr(obj, 'tag') and hasattr(obj, 'attrib') and hasattr(obj, 'text')
|
|
|
|
|
|
def elem_iter_strings(elem):
|
|
for e in elem.iter():
|
|
if e.text is not None:
|
|
yield e.text
|
|
if e.tail is not None and e is not elem:
|
|
yield e.tail
|
|
|
|
|
|
###
|
|
# XPath node test functions
|
|
#
|
|
# XPath has there are 7 kinds of nodes:
|
|
#
|
|
# element, attribute, text, namespace, processing-instruction, comment, document
|
|
#
|
|
# Element-like objects are used for representing elements and comments, ElementTree-like objects
|
|
# for documents. Generic tuples are used for representing attributes and named-tuples for namespaces.
|
|
###
|
|
def is_element_node(obj, tag=None):
|
|
"""
|
|
Returns `True` if the first argument is an element node matching the tag, `False` otherwise.
|
|
Raises a ValueError if the argument tag has to be used but it's in a wrong format.
|
|
|
|
:param obj: the node to be tested.
|
|
:param tag: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
|
|
are '*', '*:*', '*:local-name' and '{namespace}*'.
|
|
"""
|
|
if not is_etree_element(obj) or callable(obj.tag):
|
|
return False
|
|
elif tag is None:
|
|
return True
|
|
elif not obj.tag:
|
|
return obj.tag == tag
|
|
elif tag == '*' or tag == '*:*':
|
|
return obj.tag != ''
|
|
elif tag[0] == '*':
|
|
try:
|
|
_, name = tag.split(':')
|
|
except (ValueError, IndexError):
|
|
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
|
|
else:
|
|
return obj.tag.split('}')[1] == name if obj.tag[0] == '{' else obj.tag == name
|
|
elif tag[-1] == '*':
|
|
if tag[0] != '{' or '}' not in tag:
|
|
raise ElementPathValueError("unexpected format %r for argument 'tag'" % tag)
|
|
return obj.tag.split('}')[0][1:] == tag.split('}')[0][1:] if obj.tag[0] == '{' else False
|
|
else:
|
|
return obj.tag == tag
|
|
|
|
|
|
def is_attribute_node(obj, name=None):
|
|
"""
|
|
Returns `True` if the first argument is an attribute node matching the name, `False` otherwise.
|
|
Raises a ValueError if the argument name has to be used but it's in a wrong format.
|
|
|
|
:param obj: the node to be tested.
|
|
:param name: a fully qualified name, a local name or a wildcard. The accepted wildcard formats \
|
|
are '*', '*:*', '*:local-name' and '{namespace}*'.
|
|
"""
|
|
if name is None or name == '*' or name == '*:*':
|
|
return isinstance(obj, AttributeNode)
|
|
elif not isinstance(obj, AttributeNode):
|
|
return False
|
|
elif name[0] == '*':
|
|
try:
|
|
_, _name = name.split(':')
|
|
except (ValueError, IndexError):
|
|
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
|
|
else:
|
|
return obj[0].split('}')[1] == _name if obj[0][0] == '{' else obj[0] == _name
|
|
elif name[-1] == '*':
|
|
if name[0] != '{' or '}' not in name:
|
|
raise ElementPathValueError("unexpected format %r for argument 'name'" % name)
|
|
return obj[0].split('}')[0][1:] == name.split('}')[0][1:] if obj[0][0] == '{' else False
|
|
else:
|
|
return obj[0] == name
|
|
|
|
|
|
def is_schema_node(obj):
|
|
return hasattr(obj, 'name') and hasattr(obj, 'local_name') and hasattr(obj, 'type')
|
|
|
|
|
|
def is_comment_node(obj):
|
|
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'Comment'
|
|
|
|
|
|
def is_processing_instruction_node(obj):
|
|
return is_etree_element(obj) and callable(obj.tag) and obj.tag.__name__ == 'ProcessingInstruction'
|
|
|
|
|
|
def is_document_node(obj):
|
|
return all(hasattr(obj, name) for name in ('getroot', 'iter', 'iterfind', 'parse'))
|
|
|
|
|
|
def is_namespace_node(obj):
|
|
return isinstance(obj, NamespaceNode)
|
|
|
|
|
|
if not PY3:
|
|
def is_text_node(obj):
|
|
return isinstance(obj, (str, unicode))
|
|
else:
|
|
def is_text_node(obj):
|
|
return isinstance(obj, str)
|
|
|
|
|
|
def is_xpath_node(obj):
|
|
return isinstance(obj, tuple) or is_etree_element(obj) or \
|
|
is_document_node(obj) or is_text_node(obj) or is_schema_node(obj)
|
|
|
|
|
|
###
|
|
# Node accessors: in this implementation node accessors return None instead of empty sequence.
|
|
# Ref: https://www.w3.org/TR/xpath-datamodel-31/#dm-document-uri
|
|
def node_attributes(obj):
|
|
if is_element_node(obj):
|
|
return obj.attrib
|
|
|
|
|
|
def node_base_uri(obj):
|
|
try:
|
|
if is_element_node(obj):
|
|
return obj.attrib[XML_BASE]
|
|
elif is_document_node(obj):
|
|
return obj.getroot().attrib[XML_BASE]
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def node_document_uri(obj):
|
|
if is_document_node(obj):
|
|
try:
|
|
uri = obj.getroot().attrib[XML_BASE]
|
|
parts = urlparse(uri)
|
|
except (KeyError, ValueError):
|
|
pass
|
|
else:
|
|
if parts.scheme and parts.netloc or parts.path.startswith('/'):
|
|
return uri
|
|
|
|
|
|
def node_children(obj):
|
|
if is_element_node(obj):
|
|
return (child for child in obj)
|
|
elif is_document_node(obj):
|
|
return (child for child in [obj.getroot()])
|
|
|
|
|
|
def node_is_id(obj):
|
|
if is_element_node(obj):
|
|
return ncname_validator(obj.text)
|
|
elif is_attribute_node(obj):
|
|
return ncname_validator(obj[1])
|
|
|
|
|
|
def node_is_idrefs(obj):
|
|
if is_element_node(obj):
|
|
return obj.text is not None and all(ncname_validator(x) for x in obj.text.split())
|
|
elif is_attribute_node(obj):
|
|
return all(ncname_validator(x) for x in obj[1].split())
|
|
|
|
|
|
def node_nilled(obj):
|
|
if is_element_node(obj):
|
|
return obj.get(XSI_NIL) in ('true', '1')
|
|
|
|
|
|
def node_kind(obj):
|
|
if is_element_node(obj):
|
|
return 'element'
|
|
elif is_attribute_node(obj):
|
|
return 'attribute'
|
|
elif is_text_node(obj):
|
|
return 'text'
|
|
elif is_document_node(obj):
|
|
return 'document'
|
|
elif is_namespace_node(obj):
|
|
return 'namespace'
|
|
elif is_comment_node(obj):
|
|
return 'comment'
|
|
elif is_processing_instruction_node(obj):
|
|
return 'processing-instruction'
|
|
|
|
|
|
def node_name(obj):
|
|
if is_element_node(obj):
|
|
return obj.tag
|
|
elif is_attribute_node(obj) or is_namespace_node(obj):
|
|
return obj[0]
|