debian-xmlschema/xmlschema/xpath.py

332 lines
12 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
"""
This module defines a mixin class for enabling XPath on schemas.
"""
from __future__ import unicode_literals
from abc import abstractmethod
from elementpath import XPath2Parser, XPathSchemaContext, AbstractSchemaProxy
from .compat import Sequence
from .qnames import XSD_SCHEMA
from .namespaces import XSD_NAMESPACE
from .exceptions import XMLSchemaValueError, XMLSchemaTypeError
class XMLSchemaContext(XPathSchemaContext):
"""
XPath dynamic context class for *xmlschema* library. Implements safe iteration
methods for schema elements that recognize circular references.
"""
def _iter_descendants(self):
def safe_iter_descendants(context):
elem = context.item
yield elem
if elem.text is not None:
context.item = elem.text
yield context.item
if len(elem):
context.size = len(elem)
for context.position, context.item in enumerate(elem):
if context.item.parent is None:
for item in safe_iter_descendants(context):
yield item
elif getattr(context.item, 'ref', None) is not None:
yield context.item
elif context.item not in local_items:
local_items.append(context.item)
for item in safe_iter_descendants(context):
yield item
local_items = []
return safe_iter_descendants(self)
def _iter_context(self):
def safe_iter_context(context):
elem = context.item
yield elem
if elem.text is not None:
context.item = elem.text
yield context.item
for item in elem.attrib.items():
context.item = item
yield item
if len(elem):
context.size = len(elem)
for context.position, context.item in enumerate(elem):
if context.item.parent is None:
for item in safe_iter_context(context):
yield item
elif getattr(context.item, 'ref', None) is not None:
yield context.item
elif context.item not in local_items:
local_items.append(context.item)
for item in safe_iter_context(context):
yield item
local_items = []
return safe_iter_context(self)
class XMLSchemaProxy(AbstractSchemaProxy):
"""XPath schema proxy for the *xmlschema* library."""
def __init__(self, schema=None, base_element=None):
if schema is None:
from xmlschema import XMLSchema
schema = XMLSchema.meta_schema
super(XMLSchemaProxy, self).__init__(schema, base_element)
if base_element is not None:
try:
if base_element.schema is not schema:
raise XMLSchemaValueError("%r is not an element of %r" % (base_element, schema))
except AttributeError:
raise XMLSchemaTypeError("%r is not an XsdElement" % base_element)
def bind_parser(self, parser):
if parser.schema is not self:
parser.schema = self
try:
parser.symbol_table = self._schema.xpath_tokens[parser.__class__]
except KeyError:
parser.symbol_table = parser.__class__.symbol_table.copy()
self._schema.xpath_tokens[parser.__class__] = parser.symbol_table
for xsd_type in self.iter_atomic_types():
parser.schema_constructor(xsd_type.name)
parser.tokenizer = parser.create_tokenizer(parser.symbol_table)
def get_context(self):
return XMLSchemaContext(root=self._schema, item=self._base_element)
def get_type(self, qname):
try:
return self._schema.maps.types[qname]
except KeyError:
return None
def get_attribute(self, qname):
try:
return self._schema.maps.attributes[qname]
except KeyError:
return None
def get_element(self, qname):
try:
return self._schema.maps.elements[qname]
except KeyError:
return None
def get_substitution_group(self, qname):
try:
return self._schema.maps.substitution_groups[qname]
except KeyError:
return None
def find(self, path, namespaces=None):
return self._schema.find(path, namespaces)
def is_instance(self, obj, type_qname):
xsd_type = self._schema.maps.types[type_qname]
try:
xsd_type.encode(obj)
except ValueError:
return False
else:
return True
def cast_as(self, obj, type_qname):
xsd_type = self._schema.maps.types[type_qname]
return xsd_type.decode(obj)
def iter_atomic_types(self):
for xsd_type in self._schema.maps.types.values():
if xsd_type.target_namespace != XSD_NAMESPACE and hasattr(xsd_type, 'primitive_type'):
yield xsd_type
def get_primitive_type(self, xsd_type):
if not xsd_type.is_simple():
return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE]
elif not hasattr(xsd_type, 'primitive_type'):
if xsd_type.base_type is None:
return xsd_type
return self.get_primitive_type(xsd_type.base_type)
elif xsd_type.primitive_type is not xsd_type:
return self.get_primitive_type(xsd_type.primitive_type)
else:
return xsd_type
class ElementPathMixin(Sequence):
"""
Mixin abstract class for enabling ElementTree and XPath API on XSD components.
:cvar text: The Element text. Its value is always `None`. For compatibility with the ElementTree API.
:cvar tail: The Element tail. Its value is always `None`. For compatibility with the ElementTree API.
"""
text = None
tail = None
attributes = {}
namespaces = {}
xpath_default_namespace = None
_xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use.
def __getstate__(self):
state = self.__dict__.copy()
state.pop('_xpath_parser', None)
return state
@abstractmethod
def __iter__(self):
pass
def __getitem__(self, i):
try:
return [e for e in self][i]
except AttributeError:
raise IndexError('child index out of range')
def __reversed__(self):
return reversed([e for e in self])
def __len__(self):
return len([e for e in self])
@property
def tag(self):
"""Alias of the *name* attribute. For compatibility with the ElementTree API."""
return getattr(self, 'name')
@property
def attrib(self):
"""Returns the Element attributes. For compatibility with the ElementTree API."""
return self.attributes
def get(self, key, default=None):
"""Gets an Element attribute. For compatibility with the ElementTree API."""
return self.attributes.get(key, default)
@property
def xpath_proxy(self):
"""Returns an XPath proxy instance bound with the schema."""
raise NotImplementedError
def _rebind_xpath_parser(self):
"""Rebind XPath 2 parser with schema component."""
if self._xpath_parser is not None:
self._xpath_parser.schema.bind_parser(self._xpath_parser)
def _get_xpath_namespaces(self, namespaces=None):
"""
Returns a dictionary with namespaces for XPath selection.
:param namespaces: an optional map from namespace prefix to namespace URI. \
If this argument is not provided the schema's namespaces are used.
"""
if namespaces is None:
namespaces = {k: v for k, v in self.namespaces.items() if k}
namespaces[''] = self.xpath_default_namespace
elif '' not in namespaces:
namespaces[''] = self.xpath_default_namespace
xpath_namespaces = XPath2Parser.DEFAULT_NAMESPACES.copy()
xpath_namespaces.update(namespaces)
return xpath_namespaces
def _xpath_parse(self, path, namespaces=None):
path = path.strip()
if path.startswith('/') and not path.startswith('//'):
path = ''.join(['/', XSD_SCHEMA, path])
namespaces = self._get_xpath_namespaces(namespaces)
if self._xpath_parser is None:
self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy)
else:
self._xpath_parser.namespaces = namespaces
return self._xpath_parser.parse(path)
def find(self, path, namespaces=None):
"""
Finds the first XSD subelement matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: an optional mapping from namespace prefix to namespace URI.
:return: The first matching XSD subelement or ``None`` if there is not match.
"""
context = XMLSchemaContext(self)
return next(self._xpath_parse(path, namespaces).select_results(context), None)
def findall(self, path, namespaces=None):
"""
Finds all XSD subelements matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: an optional mapping from namespace prefix to full name.
:return: a list containing all matching XSD subelements in document order, an empty \
list is returned if there is no match.
"""
context = XMLSchemaContext(self)
return self._xpath_parse(path, namespaces).get_results(context)
def iterfind(self, path, namespaces=None):
"""
Creates and iterator for all XSD subelements matching the path.
:param path: an XPath expression that considers the XSD component as the root element.
:param namespaces: is an optional mapping from namespace prefix to full name.
:return: an iterable yielding all matching XSD subelements in document order.
"""
context = XMLSchemaContext(self)
return self._xpath_parse(path, namespaces).select_results(context)
def iter(self, tag=None):
"""
Creates an iterator for the XSD element and its subelements. If tag is not `None` or '*',
only XSD elements whose matches tag are returned from the iterator. Local elements are
expanded without repetitions. Element references are not expanded because the global
elements are not descendants of other elements.
"""
def safe_iter(elem):
if tag is None or elem.is_matching(tag):
yield elem
for child in elem:
if child.parent is None:
for e in safe_iter(child):
yield e
elif getattr(child, 'ref', None) is not None:
if tag is None or elem.is_matching(tag):
yield child
elif child not in local_elements:
local_elements.append(child)
for e in safe_iter(child):
yield e
if tag == '*':
tag = None
local_elements = []
return safe_iter(self)
def iterchildren(self, tag=None):
"""
Creates an iterator for the child elements of the XSD component. If *tag* is not `None`
or '*', only XSD elements whose name matches tag are returned from the iterator.
"""
if tag == '*':
tag = None
for child in self:
if tag is None or child.is_matching(tag):
yield child