949 lines
43 KiB
Python
949 lines
43 KiB
Python
#
|
|
# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
"""
|
|
This module contains converter classes and definitions.
|
|
"""
|
|
from __future__ import unicode_literals
|
|
from collections import namedtuple
|
|
from types import MethodType
|
|
import string
|
|
import warnings
|
|
|
|
from .compat import ordered_dict_class, unicode_type
|
|
from .exceptions import XMLSchemaValueError
|
|
from .namespaces import XSI_NAMESPACE
|
|
from .qnames import local_name
|
|
from .etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace
|
|
from xmlschema.namespaces import NamespaceMapper
|
|
|
|
ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes'])
|
|
"""
|
|
Namedtuple for Element data interchange between decoders and converters.
|
|
The field *tag* is a string containing the Element's tag, *text* can be `None`
|
|
or a string representing the Element's text, *content* can be `None`, a list
|
|
containing the Element's children or a dictionary containing element name to
|
|
list of element contents for the Element's children (used for unordered input
|
|
data), *attributes* can be `None` or a dictionary containing the Element's
|
|
attributes.
|
|
"""
|
|
|
|
|
|
def raw_xml_encode(value):
|
|
"""Encodes a simple value to XML."""
|
|
if isinstance(value, bool):
|
|
return 'true' if value else 'false'
|
|
elif isinstance(value, (list, tuple)):
|
|
return ' '.join(unicode_type(e) for e in value)
|
|
else:
|
|
return unicode_type(value)
|
|
|
|
|
|
class XMLSchemaConverter(NamespaceMapper):
|
|
"""
|
|
Generic XML Schema based converter class. A converter is used to compose
|
|
decoded XML data for an Element into a data structure and to build an Element
|
|
from encoded data structure. There are two methods for interfacing the
|
|
converter with the decoding/encoding process. The method *element_decode*
|
|
accepts ElementData instance, containing the element parts, and returns
|
|
a data structure. The method *element_encode* accepts a data structure
|
|
and returns an ElementData that can be
|
|
|
|
:param namespaces: map from namespace prefixes to URI.
|
|
:param dict_class: dictionary class to use for decoded data. Default is `dict`.
|
|
:param list_class: list class to use for decoded data. Default is `list`.
|
|
:param etree_element_class: the class that has to be used to create new XML elements, \
|
|
if not provided uses the ElementTree's Element class.
|
|
:param text_key: is the key to apply to element's decoded text data.
|
|
:param attr_prefix: controls the mapping of XML attributes, to the same name or \
|
|
with a prefix. If `None` the converter ignores attributes.
|
|
:param cdata_prefix: is used for including and prefixing the character data parts \
|
|
of a mixed content, that are labeled with an integer instead of a string. \
|
|
Character data parts are ignored if this argument is `None`.
|
|
:param indent: number of spaces for XML indentation (default is 4).
|
|
:param strip_namespaces: if set to `True` removes namespace declarations from data and \
|
|
namespace information from names, during decoding or encoding. Defaults to `False`.
|
|
:param preserve_root: if set to `True` the root element is preserved, wrapped into a \
|
|
single-item dictionary. Applicable only to default converter and to :class:`ParkerConverter`.
|
|
:param force_dict: if set to `True` complex elements with simple content are decoded \
|
|
with a dictionary also if there are no decoded attributes. Applicable to default converter \
|
|
only. Defaults to `False`.
|
|
:param force_list: if set to `True` child elements are decoded within a list in any case. \
|
|
Applicable to default converter only. Defaults to `False`.
|
|
|
|
:ivar dict: dictionary class to use for decoded data.
|
|
:ivar list: list class to use for decoded data.
|
|
:ivar etree_element_class: Element class to use
|
|
:ivar text_key: key for decoded Element text
|
|
:ivar attr_prefix: prefix for attribute names
|
|
:ivar cdata_prefix: prefix for character data parts
|
|
:ivar indent: indentation to use for rebuilding XML trees
|
|
:ivar strip_namespaces: remove namespace information
|
|
:ivar preserve_root: preserve the root element on decoding
|
|
:ivar force_dict: force dictionary for complex elements with simple content
|
|
:ivar force_list: force list for child elements
|
|
"""
|
|
# Deprecation from release v1.0.14
|
|
def _unmap_attribute_qname(self, name):
|
|
warnings.warn("the _unmap_attribute_qname method is deprecated and will "
|
|
"be removed in 1.1 version. Use the unmap_qname() instead, "
|
|
"providing the attribute group of the XSD element for the "
|
|
"optional *name_table* argument.",
|
|
DeprecationWarning, stacklevel=2)
|
|
if name[0] == '{' or ':' not in name:
|
|
return name
|
|
else:
|
|
return self.unmap_qname(name)
|
|
|
|
@property
|
|
def lossless(self):
|
|
"""The negation of *lossy* property, preserved for backward compatibility."""
|
|
warnings.warn("the lossless property will be removed in 1.1 version, "
|
|
"use 'not self.lossy' instead", DeprecationWarning, stacklevel=2)
|
|
return not self.lossy
|
|
|
|
def __init__(self, namespaces=None, dict_class=None, list_class=None, etree_element_class=None,
|
|
text_key='$', attr_prefix='@', cdata_prefix=None, indent=4, strip_namespaces=False,
|
|
preserve_root=False, force_dict=False, force_list=False, **kwargs):
|
|
if etree_element_class is not None and etree_element_class not in (etree_element, lxml_etree_element):
|
|
raise XMLSchemaValueError("%r: unsupported element.")
|
|
|
|
self.dict = dict_class or dict
|
|
self.list = list_class or list
|
|
self.etree_element_class = etree_element_class or etree_element
|
|
self.text_key = text_key
|
|
self.attr_prefix = attr_prefix
|
|
self.cdata_prefix = cdata_prefix
|
|
self.indent = indent
|
|
self.strip_namespaces = strip_namespaces
|
|
self.preserve_root = preserve_root
|
|
self.force_dict = force_dict
|
|
self.force_list = force_list
|
|
|
|
if self.etree_element_class is etree_element:
|
|
super(XMLSchemaConverter, self).__init__(namespaces, etree_register_namespace)
|
|
else:
|
|
super(XMLSchemaConverter, self).__init__(namespaces, lxml_etree_register_namespace)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name in ('attr_prefix', 'text_key', 'cdata_prefix'):
|
|
if value is not None and any(c in string.ascii_letters or c == '_' for c in value):
|
|
raise XMLSchemaValueError('%r cannot includes letters or underscores: %r' % (name, value))
|
|
elif name == 'attr_prefix':
|
|
self.ns_prefix = (value or '') + 'xmlns'
|
|
elif name == 'strip_namespaces':
|
|
if value:
|
|
self.map_qname = MethodType(local_name, self)
|
|
self.unmap_qname = MethodType(lambda x, y=None: local_name(x), self)
|
|
elif getattr(self, 'strip_namespaces', False):
|
|
# Rebuild instance methods only if necessary
|
|
self.map_qname = MethodType(XMLSchemaConverter.map_qname, self)
|
|
self.unmap_qname = MethodType(XMLSchemaConverter.unmap_qname, self)
|
|
super(XMLSchemaConverter, self).__setattr__(name, value)
|
|
|
|
@property
|
|
def lossy(self):
|
|
"""The converter ignores some kind of XML data during decoding/encoding."""
|
|
return not self.cdata_prefix or not self.text_key or not self.attr_prefix
|
|
|
|
@property
|
|
def losslessly(self):
|
|
"""
|
|
The XML data is decoded without loss of quality, neither on data nor on data model
|
|
shape. Only losslessly converters can be always used to encode to an XML data that
|
|
is strictly conformant to the schema.
|
|
"""
|
|
return False
|
|
|
|
def copy(self, **kwargs):
|
|
return type(self)(
|
|
namespaces=kwargs.get('namespaces', self._namespaces),
|
|
dict_class=kwargs.get('dict_class', self.dict),
|
|
list_class=kwargs.get('list_class', self.list),
|
|
etree_element_class=kwargs.get('etree_element_class'),
|
|
text_key=kwargs.get('text_key', self.text_key),
|
|
attr_prefix=kwargs.get('attr_prefix', self.attr_prefix),
|
|
cdata_prefix=kwargs.get('cdata_prefix', self.cdata_prefix),
|
|
indent=kwargs.get('indent', self.indent),
|
|
strip_namespaces=kwargs.get('strip_namespaces', self.strip_namespaces),
|
|
preserve_root=kwargs.get('preserve_root', self.preserve_root),
|
|
force_dict=kwargs.get('force_dict', self.force_dict),
|
|
force_list=kwargs.get('force_list', self.force_list),
|
|
)
|
|
|
|
def map_attributes(self, attributes):
|
|
"""
|
|
Creates an iterator for converting decoded attributes to a data structure with
|
|
appropriate prefixes. If the instance has a not-empty map of namespaces registers
|
|
the mapped URIs and prefixes.
|
|
|
|
:param attributes: A sequence or an iterator of couples with the name of \
|
|
the attribute and the decoded value. Default is `None` (for `simpleType` \
|
|
elements, that don't have attributes).
|
|
"""
|
|
if self.attr_prefix is None or not attributes:
|
|
return
|
|
elif self.attr_prefix:
|
|
for name, value in attributes:
|
|
yield '%s%s' % (self.attr_prefix, self.map_qname(name)), value
|
|
else:
|
|
for name, value in attributes:
|
|
yield self.map_qname(name), value
|
|
|
|
def map_content(self, content):
|
|
"""
|
|
A generator function for converting decoded content to a data structure.
|
|
If the instance has a not-empty map of namespaces registers the mapped URIs
|
|
and prefixes.
|
|
|
|
:param content: A sequence or an iterator of tuples with the name of the \
|
|
element, the decoded value and the `XsdElement` instance associated.
|
|
"""
|
|
if not content:
|
|
return
|
|
|
|
map_qname = self.map_qname
|
|
for name, value, xsd_child in content:
|
|
try:
|
|
if name[0] == '{':
|
|
yield map_qname(name), value, xsd_child
|
|
else:
|
|
yield name, value, xsd_child
|
|
except TypeError:
|
|
if self.cdata_prefix is not None:
|
|
yield '%s%s' % (self.cdata_prefix, name), value, xsd_child
|
|
|
|
def etree_element(self, tag, text=None, children=None, attrib=None, level=0):
|
|
"""
|
|
Builds an ElementTree's Element using arguments and the element class and
|
|
the indent spacing stored in the converter instance.
|
|
|
|
:param tag: the Element tag string.
|
|
:param text: the Element text.
|
|
:param children: the list of Element children/subelements.
|
|
:param attrib: a dictionary with Element attributes.
|
|
:param level: the level related to the encoding process (0 means the root).
|
|
:return: an instance of the Element class setted for the converter instance.
|
|
"""
|
|
if type(self.etree_element_class) is type(etree_element):
|
|
if attrib is None:
|
|
elem = self.etree_element_class(tag)
|
|
else:
|
|
elem = self.etree_element_class(tag, self.dict(attrib))
|
|
else:
|
|
nsmap = {prefix if prefix else None: uri for prefix, uri in self._namespaces.items()}
|
|
elem = self.etree_element_class(tag, nsmap=nsmap)
|
|
elem.attrib.update(attrib)
|
|
|
|
if children:
|
|
elem.extend(children)
|
|
elem.text = text or '\n' + ' ' * self.indent * (level + 1)
|
|
elem.tail = '\n' + ' ' * self.indent * level
|
|
else:
|
|
elem.text = text
|
|
elem.tail = '\n' + ' ' * self.indent * level
|
|
|
|
return elem
|
|
|
|
def element_decode(self, data, xsd_element, level=0):
|
|
"""
|
|
Converts a decoded element data to a data structure.
|
|
|
|
:param data: ElementData instance decoded from an Element node.
|
|
:param xsd_element: the `XsdElement` associated to decoded the data.
|
|
:param level: the level related to the decoding process (0 means the root).
|
|
:return: a data structure containing the decoded data.
|
|
"""
|
|
result_dict = self.dict()
|
|
if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self:
|
|
schema_namespaces = set(xsd_element.namespaces.values())
|
|
result_dict.update(
|
|
('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) for k, v in self.items()
|
|
if v in schema_namespaces or v == XSI_NAMESPACE
|
|
)
|
|
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
if data.attributes or self.force_dict and not xsd_element.type.is_simple():
|
|
result_dict.update(t for t in self.map_attributes(data.attributes))
|
|
if data.text is not None and data.text != '':
|
|
result_dict[self.text_key] = data.text
|
|
return result_dict
|
|
else:
|
|
return data.text if data.text != '' else None
|
|
else:
|
|
if data.attributes:
|
|
result_dict.update(t for t in self.map_attributes(data.attributes))
|
|
|
|
has_single_group = xsd_element.type.content_type.is_single()
|
|
list_types = list if self.list is list else (self.list, list)
|
|
for name, value, xsd_child in self.map_content(data.content):
|
|
try:
|
|
result = result_dict[name]
|
|
except KeyError:
|
|
if xsd_child is None or has_single_group and xsd_child.is_single():
|
|
result_dict[name] = self.list([value]) if self.force_list else value
|
|
else:
|
|
result_dict[name] = self.list([value])
|
|
else:
|
|
if not isinstance(result, list_types) or not result:
|
|
result_dict[name] = self.list([result, value])
|
|
elif isinstance(result[0], list_types) or not isinstance(value, list_types):
|
|
result.append(value)
|
|
else:
|
|
result_dict[name] = self.list([result, value])
|
|
|
|
if level == 0 and self.preserve_root:
|
|
return self.dict([(self.map_qname(data.tag), result_dict if result_dict else None)])
|
|
return result_dict if result_dict else None
|
|
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
"""
|
|
Extracts XML decoded data from a data structure for encoding into an ElementTree.
|
|
|
|
:param obj: the decoded object.
|
|
:param xsd_element: the `XsdElement` associated to the decoded data structure.
|
|
:param level: the level related to the encoding process (0 means the root).
|
|
:return: an ElementData instance.
|
|
"""
|
|
if level != 0:
|
|
tag = xsd_element.name
|
|
elif not self.preserve_root:
|
|
tag = xsd_element.qualified_name
|
|
else:
|
|
tag = xsd_element.qualified_name
|
|
try:
|
|
obj = obj.get(tag, xsd_element.local_name)
|
|
except (KeyError, AttributeError, TypeError):
|
|
pass
|
|
|
|
if not isinstance(obj, (self.dict, dict)):
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
return ElementData(tag, obj, None, {})
|
|
else:
|
|
return ElementData(tag, None, obj, {})
|
|
|
|
text_key = self.text_key
|
|
attr_prefix = self.attr_prefix
|
|
ns_prefix = self.ns_prefix
|
|
cdata_prefix = self.cdata_prefix
|
|
|
|
text = None
|
|
content = []
|
|
attributes = {}
|
|
for name, value in obj.items():
|
|
if text_key and name == self.text_key:
|
|
text = obj[text_key]
|
|
elif (cdata_prefix and name.startswith(cdata_prefix)) or \
|
|
name[0].isdigit() and cdata_prefix == '':
|
|
index = int(name[len(cdata_prefix):])
|
|
content.append((index, value))
|
|
elif name == ns_prefix:
|
|
self[''] = value
|
|
elif name.startswith('%s:' % ns_prefix):
|
|
if not self.strip_namespaces:
|
|
self[name[len(ns_prefix) + 1:]] = value
|
|
elif attr_prefix and name.startswith(attr_prefix):
|
|
attr_name = name[len(attr_prefix):]
|
|
ns_name = self.unmap_qname(attr_name, xsd_element.attributes)
|
|
attributes[ns_name] = value
|
|
elif not isinstance(value, (self.list, list)) or not value:
|
|
content.append((self.unmap_qname(name), value))
|
|
elif isinstance(value[0], (self.dict, dict, self.list, list)):
|
|
ns_name = self.unmap_qname(name)
|
|
content.extend((ns_name, item) for item in value)
|
|
else:
|
|
ns_name = self.unmap_qname(name)
|
|
for xsd_child in xsd_element.type.content_type.iter_elements():
|
|
matched_element = xsd_child.match(ns_name, resolve=True)
|
|
if matched_element is not None:
|
|
if matched_element.type.is_list():
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.extend((ns_name, item) for item in value)
|
|
break
|
|
else:
|
|
if attr_prefix == '' and ns_name not in attributes:
|
|
for key, xsd_attribute in xsd_element.attributes.items():
|
|
if xsd_attribute.is_matching(ns_name):
|
|
attributes[key] = value
|
|
break
|
|
else:
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.append((ns_name, value))
|
|
|
|
return ElementData(tag, text, content, attributes)
|
|
|
|
|
|
class UnorderedConverter(XMLSchemaConverter):
|
|
"""
|
|
Same as :class:`XMLSchemaConverter` but :meth:`element_encode` returns
|
|
a dictionary for the content of the element, that can be used directly
|
|
for unordered encoding mode. In this mode the order of the elements in
|
|
the encoded output is based on the model visitor pattern rather than
|
|
the order in which the elements were added to the input dictionary.
|
|
As the order of the input dictionary is not preserved, character data
|
|
between sibling elements are interleaved between tags.
|
|
"""
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
"""
|
|
Extracts XML decoded data from a data structure for encoding into an ElementTree.
|
|
|
|
:param obj: the decoded object.
|
|
:param xsd_element: the `XsdElement` associated to the decoded data structure.
|
|
:param level: the level related to the encoding process (0 means the root).
|
|
:return: an ElementData instance.
|
|
"""
|
|
if level != 0:
|
|
tag = xsd_element.name
|
|
elif not self.preserve_root:
|
|
tag = xsd_element.qualified_name
|
|
else:
|
|
tag = xsd_element.qualified_name
|
|
try:
|
|
obj = obj.get(tag, xsd_element.local_name)
|
|
except (KeyError, AttributeError, TypeError):
|
|
pass
|
|
|
|
if not isinstance(obj, (self.dict, dict)):
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
return ElementData(tag, obj, None, {})
|
|
else:
|
|
return ElementData(tag, None, obj, {})
|
|
|
|
text_key = self.text_key
|
|
attr_prefix = self.attr_prefix
|
|
ns_prefix = self.ns_prefix
|
|
cdata_prefix = self.cdata_prefix
|
|
|
|
text = None
|
|
attributes = {}
|
|
|
|
# The unordered encoding mode assumes that the values of this dict will
|
|
# all be lists where each item is the content of a single element. When
|
|
# building content_lu, content which is not a list or lists to be placed
|
|
# into a single element (element has a list content type) must be wrapped
|
|
# in a list to retain that structure. Character data are not wrapped into
|
|
# lists because they because they are divided from the rest of the content
|
|
# into the unordered mode generator function of the ModelVisitor class.
|
|
content_lu = {}
|
|
|
|
for name, value in obj.items():
|
|
if text_key and name == text_key:
|
|
text = obj[text_key]
|
|
elif (cdata_prefix and name.startswith(cdata_prefix)) or \
|
|
name[0].isdigit() and cdata_prefix == '':
|
|
index = int(name[len(cdata_prefix):])
|
|
content_lu[index] = value
|
|
elif name == ns_prefix:
|
|
self[''] = value
|
|
elif name.startswith('%s:' % ns_prefix):
|
|
self[name[len(ns_prefix) + 1:]] = value
|
|
elif attr_prefix and name.startswith(attr_prefix):
|
|
attr_name = name[len(attr_prefix):]
|
|
ns_name = self.unmap_qname(attr_name, xsd_element.attributes)
|
|
attributes[ns_name] = value
|
|
elif not isinstance(value, (self.list, list)) or not value:
|
|
content_lu[self.unmap_qname(name)] = [value]
|
|
elif isinstance(value[0], (self.dict, dict, self.list, list)):
|
|
content_lu[self.unmap_qname(name)] = value
|
|
else:
|
|
# `value` is a list but not a list of lists or list of dicts.
|
|
ns_name = self.unmap_qname(name)
|
|
for xsd_child in xsd_element.type.content_type.iter_elements():
|
|
matched_element = xsd_child.match(ns_name, resolve=True)
|
|
if matched_element is not None:
|
|
if matched_element.type.is_list():
|
|
content_lu[self.unmap_qname(name)] = [value]
|
|
else:
|
|
content_lu[self.unmap_qname(name)] = value
|
|
break
|
|
else:
|
|
if attr_prefix == '' and ns_name not in attributes:
|
|
for xsd_attribute in xsd_element.attributes.values():
|
|
if xsd_attribute.is_matching(ns_name):
|
|
attributes[ns_name] = value
|
|
break
|
|
else:
|
|
content_lu[self.unmap_qname(name)] = [value]
|
|
else:
|
|
content_lu[self.unmap_qname(name)] = [value]
|
|
|
|
return ElementData(tag, text, content_lu, attributes)
|
|
|
|
|
|
class ParkerConverter(XMLSchemaConverter):
|
|
"""
|
|
XML Schema based converter class for Parker convention.
|
|
|
|
ref: http://wiki.open311.org/JSON_and_XML_Conversion/#the-parker-convention
|
|
ref: https://developer.mozilla.org/en-US/docs/Archive/JXON#The_Parker_Convention
|
|
|
|
:param namespaces: Map from namespace prefixes to URI.
|
|
:param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
|
|
Python 3.6+ or `OrderedDict` for previous versions.
|
|
:param list_class: List class to use for decoded data. Default is `list`.
|
|
:param preserve_root: If `True` the root element will be preserved. For default \
|
|
the Parker convention remove the document root element, returning only the value.
|
|
"""
|
|
def __init__(self, namespaces=None, dict_class=None, list_class=None, preserve_root=False, **kwargs):
|
|
kwargs.update(attr_prefix=None, text_key='', cdata_prefix=None)
|
|
super(ParkerConverter, self).__init__(
|
|
namespaces, dict_class or ordered_dict_class, list_class, preserve_root=preserve_root, **kwargs
|
|
)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'text_key' and value != '' or name in ('attr_prefix', 'cdata_prefix') and value is not None:
|
|
raise XMLSchemaValueError('Wrong value %r for the attribute %r of a %r.' % (value, name, type(self)))
|
|
super(XMLSchemaConverter, self).__setattr__(name, value)
|
|
|
|
@property
|
|
def lossy(self):
|
|
return True
|
|
|
|
def element_decode(self, data, xsd_element, level=0):
|
|
map_qname = self.map_qname
|
|
preserve_root = self.preserve_root
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
if preserve_root:
|
|
return self.dict([(map_qname(data.tag), data.text)])
|
|
else:
|
|
return data.text if data.text != '' else None
|
|
else:
|
|
result_dict = self.dict()
|
|
list_types = list if self.list is list else (self.list, list)
|
|
for name, value, xsd_child in self.map_content(data.content):
|
|
if preserve_root:
|
|
try:
|
|
if len(value) == 1:
|
|
value = value[name]
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
try:
|
|
result_dict[name].append(value)
|
|
except KeyError:
|
|
if isinstance(value, list_types):
|
|
result_dict[name] = self.list([value])
|
|
else:
|
|
result_dict[name] = value
|
|
except AttributeError:
|
|
result_dict[name] = self.list([result_dict[name], value])
|
|
|
|
for k, v in result_dict.items():
|
|
if isinstance(v, (self.list, list)) and len(v) == 1:
|
|
value = v.pop()
|
|
v.extend(value)
|
|
|
|
if preserve_root:
|
|
return self.dict([(map_qname(data.tag), result_dict)])
|
|
else:
|
|
return result_dict if result_dict else None
|
|
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
if not isinstance(obj, (self.dict, dict)):
|
|
if obj == '':
|
|
obj = None
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
return ElementData(xsd_element.name, obj, None, {})
|
|
else:
|
|
return ElementData(xsd_element.name, None, obj, {})
|
|
else:
|
|
unmap_qname = self.unmap_qname
|
|
if not obj:
|
|
return ElementData(xsd_element.name, None, None, {})
|
|
elif self.preserve_root:
|
|
try:
|
|
items = obj[self.map_qname(xsd_element.name)]
|
|
except KeyError:
|
|
return ElementData(xsd_element.name, None, None, {})
|
|
else:
|
|
items = obj
|
|
|
|
try:
|
|
content = []
|
|
for name, value in obj.items():
|
|
ns_name = unmap_qname(name)
|
|
if not isinstance(value, (self.list, list)) or not value:
|
|
content.append((ns_name, value))
|
|
elif any(isinstance(v, (self.list, list)) for v in value):
|
|
for item in value:
|
|
content.append((ns_name, item))
|
|
else:
|
|
for xsd_child in xsd_element.type.content_type.iter_elements():
|
|
matched_element = xsd_child.match(ns_name, resolve=True)
|
|
if matched_element is not None:
|
|
if matched_element.type.is_list():
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.extend((ns_name, item) for item in value)
|
|
break
|
|
else:
|
|
content.extend((ns_name, item) for item in value)
|
|
|
|
except AttributeError:
|
|
return ElementData(xsd_element.name, items, None, {})
|
|
else:
|
|
return ElementData(xsd_element.name, None, content, {})
|
|
|
|
|
|
class BadgerFishConverter(XMLSchemaConverter):
|
|
"""
|
|
XML Schema based converter class for Badgerfish convention.
|
|
|
|
ref: http://www.sklar.com/badgerfish/
|
|
ref: http://badgerfish.ning.com/
|
|
|
|
:param namespaces: Map from namespace prefixes to URI.
|
|
:param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
|
|
Python 3.6+ or `OrderedDict` for previous versions.
|
|
:param list_class: List class to use for decoded data. Default is `list`.
|
|
"""
|
|
def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
|
|
kwargs.update(attr_prefix='@', text_key='$', cdata_prefix='$')
|
|
super(BadgerFishConverter, self).__init__(
|
|
namespaces, dict_class or ordered_dict_class, list_class, **kwargs
|
|
)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'text_key' and value != '$' or name == 'attr_prefix' and value != '@' or \
|
|
name == 'cdata_prefix' and value != '$':
|
|
raise XMLSchemaValueError('Wrong value %r for the attribute %r of a %r.' % (value, name, type(self)))
|
|
super(XMLSchemaConverter, self).__setattr__(name, value)
|
|
|
|
@property
|
|
def lossy(self):
|
|
return False
|
|
|
|
def element_decode(self, data, xsd_element, level=0):
|
|
dict_class = self.dict
|
|
|
|
tag = self.map_qname(data.tag)
|
|
has_local_root = not self and not self.strip_namespaces
|
|
result_dict = dict_class([t for t in self.map_attributes(data.attributes)])
|
|
if has_local_root:
|
|
result_dict['@xmlns'] = dict_class()
|
|
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
if data.text is not None and data.text != '':
|
|
result_dict[self.text_key] = data.text
|
|
else:
|
|
has_single_group = xsd_element.type.content_type.is_single()
|
|
list_types = list if self.list is list else (self.list, list)
|
|
for name, value, xsd_child in self.map_content(data.content):
|
|
try:
|
|
if '@xmlns' in value:
|
|
self.transfer(value['@xmlns'])
|
|
if not value['@xmlns']:
|
|
del value['@xmlns']
|
|
elif '@xmlns' in value[name]:
|
|
self.transfer(value[name]['@xmlns'])
|
|
if not value[name]['@xmlns']:
|
|
del value[name]['@xmlns']
|
|
if len(value) == 1:
|
|
value = value[name]
|
|
except (TypeError, KeyError):
|
|
pass
|
|
|
|
if value is None:
|
|
value = self.dict()
|
|
|
|
try:
|
|
result = result_dict[name]
|
|
except KeyError:
|
|
if xsd_child is None or has_single_group and xsd_child.is_single():
|
|
result_dict[name] = value
|
|
else:
|
|
result_dict[name] = self.list([value])
|
|
else:
|
|
if not isinstance(result, list_types) or not result:
|
|
result_dict[name] = self.list([result, value])
|
|
elif isinstance(result[0], list_types) or not isinstance(value, list_types):
|
|
result.append(value)
|
|
else:
|
|
result_dict[name] = self.list([result, value])
|
|
|
|
if has_local_root:
|
|
if self:
|
|
result_dict['@xmlns'].update(self)
|
|
else:
|
|
del result_dict['@xmlns']
|
|
return dict_class([(tag, result_dict)])
|
|
else:
|
|
return dict_class([('@xmlns', dict_class(self)), (tag, result_dict)])
|
|
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
map_qname = self.map_qname
|
|
unmap_qname = self.unmap_qname
|
|
tag = xsd_element.qualified_name if level == 0 else xsd_element.name
|
|
|
|
if not self.strip_namespaces:
|
|
try:
|
|
self.update(obj['@xmlns'])
|
|
except KeyError:
|
|
pass
|
|
|
|
try:
|
|
element_data = obj[map_qname(xsd_element.name)]
|
|
except KeyError:
|
|
element_data = obj
|
|
|
|
text_key = self.text_key
|
|
attr_prefix = self.attr_prefix
|
|
cdata_prefix = self.cdata_prefix
|
|
text = None
|
|
content = []
|
|
attributes = {}
|
|
for name, value in element_data.items():
|
|
if name == '@xmlns':
|
|
continue
|
|
elif text_key and name == text_key:
|
|
text = element_data[text_key]
|
|
elif (cdata_prefix and name.startswith(cdata_prefix)) or \
|
|
name[0].isdigit() and cdata_prefix == '':
|
|
index = int(name[len(cdata_prefix):])
|
|
content.append((index, value))
|
|
elif attr_prefix and name.startswith(attr_prefix):
|
|
attr_name = name[len(attr_prefix):]
|
|
ns_name = self.unmap_qname(attr_name, xsd_element.attributes)
|
|
attributes[ns_name] = value
|
|
elif not isinstance(value, (self.list, list)) or not value:
|
|
content.append((unmap_qname(name), value))
|
|
elif isinstance(value[0], (self.dict, dict, self.list, list)):
|
|
ns_name = unmap_qname(name)
|
|
for item in value:
|
|
content.append((ns_name, item))
|
|
else:
|
|
ns_name = unmap_qname(name)
|
|
for xsd_child in xsd_element.type.content_type.iter_elements():
|
|
matched_element = xsd_child.match(ns_name, resolve=True)
|
|
if matched_element is not None:
|
|
if matched_element.type.is_list():
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.extend((ns_name, item) for item in value)
|
|
break
|
|
else:
|
|
if attr_prefix == '' and ns_name not in attributes:
|
|
for xsd_attribute in xsd_element.attributes.values():
|
|
if xsd_attribute.is_matching(ns_name):
|
|
attributes[ns_name] = value
|
|
break
|
|
else:
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.append((ns_name, value))
|
|
|
|
return ElementData(tag, text, content, attributes)
|
|
|
|
|
|
class AbderaConverter(XMLSchemaConverter):
|
|
"""
|
|
XML Schema based converter class for Abdera convention.
|
|
|
|
ref: http://wiki.open311.org/JSON_and_XML_Conversion/#the-abdera-convention
|
|
ref: https://cwiki.apache.org/confluence/display/ABDERA/JSON+Serialization
|
|
|
|
:param namespaces: Map from namespace prefixes to URI.
|
|
:param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
|
|
Python 3.6+ or `OrderedDict` for previous versions.
|
|
:param list_class: List class to use for decoded data. Default is `list`.
|
|
"""
|
|
def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
|
|
kwargs.update(attr_prefix='', text_key='', cdata_prefix=None)
|
|
super(AbderaConverter, self).__init__(
|
|
namespaces, dict_class or ordered_dict_class, list_class, **kwargs
|
|
)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name in ('text_key', 'attr_prefix') and value != '' or name == 'cdata_prefix' and value is not None:
|
|
raise XMLSchemaValueError('Wrong value %r for the attribute %r of a %r.' % (value, name, type(self)))
|
|
super(XMLSchemaConverter, self).__setattr__(name, value)
|
|
|
|
@property
|
|
def lossy(self):
|
|
return True
|
|
|
|
def element_decode(self, data, xsd_element, level=0):
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
children = data.text if data.text is not None and data.text != '' else None
|
|
else:
|
|
children = self.dict()
|
|
for name, value, xsd_child in self.map_content(data.content):
|
|
if value is None:
|
|
value = self.list()
|
|
|
|
try:
|
|
children[name].append(value)
|
|
except KeyError:
|
|
if isinstance(value, (self.list, list)) and value:
|
|
children[name] = self.list([value])
|
|
else:
|
|
children[name] = value
|
|
except AttributeError:
|
|
children[name] = self.list([children[name], value])
|
|
if not children:
|
|
children = None
|
|
|
|
if data.attributes:
|
|
if children != []:
|
|
return self.dict([
|
|
('attributes', self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])),
|
|
('children', self.list([children]) if children is not None else self.list())
|
|
])
|
|
else:
|
|
return self.dict([
|
|
('attributes', self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])),
|
|
])
|
|
else:
|
|
return children if children is not None else self.list()
|
|
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
tag = xsd_element.qualified_name if level == 0 else xsd_element.name
|
|
|
|
if not isinstance(obj, (self.dict, dict)):
|
|
if obj == []:
|
|
obj = None
|
|
return ElementData(tag, obj, None, {})
|
|
else:
|
|
unmap_qname = self.unmap_qname
|
|
attributes = {}
|
|
try:
|
|
attributes.update([(self.unmap_qname(k, xsd_element.attributes), v)
|
|
for k, v in obj['attributes'].items()])
|
|
except KeyError:
|
|
children = obj
|
|
else:
|
|
children = obj.get('children', [])
|
|
|
|
if isinstance(children, (self.dict, dict)):
|
|
children = [children]
|
|
elif children and not isinstance(children[0], (self.dict, dict)):
|
|
if len(children) > 1:
|
|
raise XMLSchemaValueError("Wrong format")
|
|
else:
|
|
return ElementData(tag, children[0], None, attributes)
|
|
|
|
content = []
|
|
for child in children:
|
|
for name, value in child.items():
|
|
if not isinstance(value, (self.list, list)) or not value:
|
|
content.append((unmap_qname(name), value))
|
|
elif isinstance(value[0], (self.dict, dict, self.list, list)):
|
|
ns_name = unmap_qname(name)
|
|
for item in value:
|
|
content.append((ns_name, item))
|
|
else:
|
|
ns_name = unmap_qname(name)
|
|
for xsd_child in xsd_element.type.content_type.iter_elements():
|
|
matched_element = xsd_child.match(ns_name, resolve=True)
|
|
if matched_element is not None:
|
|
if matched_element.type.is_list():
|
|
content.append((ns_name, value))
|
|
else:
|
|
content.extend((ns_name, item) for item in value)
|
|
break
|
|
else:
|
|
content.append((ns_name, value))
|
|
|
|
return ElementData(tag, None, content, attributes)
|
|
|
|
|
|
class JsonMLConverter(XMLSchemaConverter):
|
|
"""
|
|
XML Schema based converter class for JsonML (JSON Mark-up Language) convention.
|
|
|
|
ref: http://www.jsonml.org/
|
|
ref: https://www.ibm.com/developerworks/library/x-jsonml/
|
|
|
|
:param namespaces: Map from namespace prefixes to URI.
|
|
:param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
|
|
Python 3.6+ or `OrderedDict` for previous versions.
|
|
:param list_class: List class to use for decoded data. Default is `list`.
|
|
"""
|
|
def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
|
|
kwargs.update(attr_prefix='', text_key='', cdata_prefix='')
|
|
super(JsonMLConverter, self).__init__(
|
|
namespaces, dict_class or ordered_dict_class, list_class, **kwargs
|
|
)
|
|
|
|
def __setattr__(self, name, value):
|
|
if name in ('text_key', 'attr_prefix', 'cdata_prefix') and value != '':
|
|
raise XMLSchemaValueError('Wrong value %r for the attribute %r of a %r.' % (value, name, type(self)))
|
|
super(XMLSchemaConverter, self).__setattr__(name, value)
|
|
|
|
@property
|
|
def lossy(self):
|
|
return False
|
|
|
|
@property
|
|
def losslessly(self):
|
|
return True
|
|
|
|
def element_decode(self, data, xsd_element, level=0):
|
|
result_list = self.list([self.map_qname(data.tag)])
|
|
attributes = self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])
|
|
|
|
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
|
|
if data.text is not None and data.text != '':
|
|
result_list.append(data.text)
|
|
else:
|
|
result_list.extend([
|
|
value if value is not None else self.list([name])
|
|
for name, value, _ in self.map_content(data.content)
|
|
])
|
|
|
|
if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self:
|
|
attributes.update([('xmlns:%s' % k if k else 'xmlns', v) for k, v in self.items()])
|
|
if attributes:
|
|
result_list.insert(1, attributes)
|
|
return result_list
|
|
|
|
def element_encode(self, obj, xsd_element, level=0):
|
|
unmap_qname = self.unmap_qname
|
|
attributes = {}
|
|
if not isinstance(obj, (self.list, list)) or not obj:
|
|
raise XMLSchemaValueError("Wrong data format, a not empty list required: %r." % obj)
|
|
|
|
data_len = len(obj)
|
|
if data_len == 1:
|
|
if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')):
|
|
raise XMLSchemaValueError("Unmatched tag")
|
|
return ElementData(xsd_element.name, None, None, attributes)
|
|
|
|
try:
|
|
for k, v in obj[1].items():
|
|
if k == 'xmlns':
|
|
self[''] = v
|
|
elif k.startswith('xmlns:'):
|
|
self[k.split('xmlns:')[1]] = v
|
|
else:
|
|
attributes[self.unmap_qname(k, xsd_element.attributes)] = v
|
|
except AttributeError:
|
|
content_index = 1
|
|
else:
|
|
content_index = 2
|
|
|
|
if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')):
|
|
raise XMLSchemaValueError("Unmatched tag")
|
|
|
|
if data_len <= content_index:
|
|
return ElementData(xsd_element.name, None, [], attributes)
|
|
elif data_len == content_index + 1 and \
|
|
(xsd_element.type.is_simple() or xsd_element.type.has_simple_content()):
|
|
return ElementData(xsd_element.name, obj[content_index], [], attributes)
|
|
else:
|
|
cdata_num = iter(range(1, data_len))
|
|
list_types = list if self.list is list else (self.list, list)
|
|
content = [
|
|
(unmap_qname(e[0]), e) if isinstance(e, list_types) else (next(cdata_num), e)
|
|
for e in obj[content_index:]
|
|
]
|
|
return ElementData(xsd_element.name, None, content, attributes)
|