201 lines
12 KiB
Python
201 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
from __future__ import unicode_literals
|
|
import json
|
|
|
|
from .compat import ordered_dict_class
|
|
from .resources import fetch_schema_locations, XMLResource
|
|
from .validators.schema import XMLSchema, XMLSchemaBase
|
|
|
|
|
|
def get_context(source, schema=None, cls=None, locations=None, base_url=None,
|
|
defuse='remote', timeout=300, lazy=False):
|
|
"""
|
|
Helper method for obtaining XML document validation/decode context.
|
|
Return an XMLResource instance and a schema instance.
|
|
"""
|
|
if cls is None:
|
|
cls = XMLSchema
|
|
|
|
try:
|
|
schema, locations = fetch_schema_locations(source, locations, base_url=base_url)
|
|
except ValueError:
|
|
if schema is None:
|
|
raise
|
|
elif not isinstance(schema, XMLSchemaBase):
|
|
schema = cls(schema, validation='strict', locations=locations, base_url=base_url,
|
|
defuse=defuse, timeout=timeout)
|
|
else:
|
|
schema = cls(schema, validation='strict', locations=locations, defuse=defuse, timeout=timeout)
|
|
|
|
if not isinstance(source, XMLResource):
|
|
source = XMLResource(source, defuse=defuse, timeout=timeout, lazy=lazy)
|
|
|
|
return source, schema
|
|
|
|
|
|
def validate(xml_document, schema=None, cls=None, path=None, schema_path=None, use_defaults=True,
|
|
namespaces=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False):
|
|
"""
|
|
Validates an XML document against a schema instance. This function builds an
|
|
:class:`XMLSchema` object for validating the XML document. Raises an
|
|
:exc:`XMLSchemaValidationError` if the XML document is not validated against
|
|
the schema.
|
|
|
|
:param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \
|
|
to a file or an URI of a resource or an Element instance or an ElementTree instance or \
|
|
a string containing the XML data. If the passed argument is not an :class:`XMLResource` \
|
|
instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments.
|
|
:param schema: can be a schema instance or a file-like object or a file path or a URL \
|
|
of a resource or a string containing the schema.
|
|
:param cls: class to use for building the schema instance (for default :class:`XMLSchema` is used).
|
|
:param path: is an optional XPath expression that matches the elements of the XML \
|
|
data that have to be decoded. If not provided the XML root element is used.
|
|
:param schema_path: an XPath expression to select the XSD element to use for decoding. \
|
|
If not provided the *path* argument or the *source* root tag are used.
|
|
:param use_defaults: defines when to use element and attribute defaults for filling \
|
|
missing required values.
|
|
:param namespaces: is an optional mapping from namespace prefix to URI.
|
|
:param locations: additional schema location hints, in case a schema instance has to be built.
|
|
:param base_url: is an optional custom base URL for remapping relative locations, for \
|
|
default uses the directory where the XSD or alternatively the XML document is located.
|
|
:param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param lazy: optional argument for construct the :class:`XMLResource` instance.
|
|
"""
|
|
source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy)
|
|
schema.validate(source, path, schema_path, use_defaults, namespaces)
|
|
|
|
|
|
def to_dict(xml_document, schema=None, cls=None, path=None, process_namespaces=True,
|
|
locations=None, base_url=None, defuse='remote', timeout=300, lazy=False, **kwargs):
|
|
"""
|
|
Decodes an XML document to a Python's nested dictionary. The decoding is based
|
|
on an XML Schema class instance. For default the document is validated during
|
|
the decoding phase. Raises an :exc:`XMLSchemaValidationError` if the XML document
|
|
is not validated against the schema.
|
|
|
|
:param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \
|
|
to a file or an URI of a resource or an Element instance or an ElementTree instance or \
|
|
a string containing the XML data. If the passed argument is not an :class:`XMLResource` \
|
|
instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments.
|
|
:param schema: can be a schema instance or a file-like object or a file path or a URL \
|
|
of a resource or a string containing the schema.
|
|
:param cls: class to use for building the schema instance (for default uses :class:`XMLSchema`).
|
|
:param path: is an optional XPath expression that matches the elements of the XML \
|
|
data that have to be decoded. If not provided the XML root element is used.
|
|
:param process_namespaces: indicates whether to use namespace information in the decoding process.
|
|
:param locations: additional schema location hints, in case a schema instance has to be built.
|
|
:param base_url: is an optional custom base URL for remapping relative locations, for \
|
|
default uses the directory where the XSD or alternatively the XML document is located.
|
|
:param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param lazy: optional argument for construct the :class:`XMLResource` instance.
|
|
:param kwargs: other optional arguments of :meth:`XMLSchema.iter_decode` as keyword arguments.
|
|
:return: an object containing the decoded data. If ``validation='lax'`` keyword argument \
|
|
is provided the validation errors are collected and returned coupled in a tuple with the \
|
|
decoded data.
|
|
:raises: :exc:`XMLSchemaValidationError` if the object is not decodable by \
|
|
the XSD component, or also if it's invalid when ``validation='strict'`` is provided.
|
|
"""
|
|
source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy)
|
|
return schema.to_dict(source, path=path, process_namespaces=process_namespaces, **kwargs)
|
|
|
|
|
|
def to_json(xml_document, fp=None, schema=None, cls=None, path=None, converter=None,
|
|
process_namespaces=True, locations=None, base_url=None, defuse='remote',
|
|
timeout=300, lazy=False, json_options=None, **kwargs):
|
|
"""
|
|
Serialize an XML document to JSON. For default the XML data is validated during
|
|
the decoding phase. Raises an :exc:`XMLSchemaValidationError` if the XML document
|
|
is not validated against the schema.
|
|
|
|
:param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \
|
|
to a file or an URI of a resource or an Element instance or an ElementTree instance or \
|
|
a string containing the XML data. If the passed argument is not an :class:`XMLResource` \
|
|
instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments.
|
|
:param fp: can be a :meth:`write()` supporting file-like object.
|
|
:param schema: can be a schema instance or a file-like object or a file path or an URL \
|
|
of a resource or a string containing the schema.
|
|
:param cls: schema class to use for building the instance (for default uses :class:`XMLSchema`).
|
|
:param path: is an optional XPath expression that matches the elements of the XML \
|
|
data that have to be decoded. If not provided the XML root element is used.
|
|
:param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding.
|
|
:param process_namespaces: indicates whether to use namespace information in the decoding process.
|
|
:param locations: additional schema location hints, in case a schema instance has to be built.
|
|
:param base_url: is an optional custom base URL for remapping relative locations, for \
|
|
default uses the directory where the XSD or alternatively the XML document is located.
|
|
:param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances.
|
|
:param lazy: optional argument for construct the :class:`XMLResource` instance.
|
|
:param json_options: a dictionary with options for the JSON serializer.
|
|
:param kwargs: optional arguments of :meth:`XMLSchema.iter_decode` as keyword arguments \
|
|
to variate the decoding process.
|
|
:return: a string containing the JSON data if *fp* is `None`, otherwise doesn't return anything. \
|
|
If ``validation='lax'`` keyword argument is provided the validation errors are collected and \
|
|
returned, eventually coupled in a tuple with the JSON data.
|
|
:raises: :exc:`XMLSchemaValidationError` if the object is not decodable by \
|
|
the XSD component, or also if it's invalid when ``validation='strict'`` is provided.
|
|
"""
|
|
source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy)
|
|
if json_options is None:
|
|
json_options = {}
|
|
decimal_type = kwargs.pop('decimal_type', float)
|
|
dict_class = kwargs.pop('dict_class', ordered_dict_class)
|
|
obj = schema.to_dict(source, path=path, decimal_type=decimal_type, dict_class=dict_class,
|
|
process_namespaces=process_namespaces, converter=converter, **kwargs)
|
|
|
|
if isinstance(obj, tuple):
|
|
if fp is not None:
|
|
json.dump(obj[0], fp, **kwargs)
|
|
return tuple(obj[1])
|
|
else:
|
|
return json.dumps(obj[0], **json_options), tuple(obj[1])
|
|
elif fp is not None:
|
|
json.dump(obj, fp, **json_options)
|
|
else:
|
|
return json.dumps(obj, **json_options)
|
|
|
|
|
|
def from_json(source, schema, path=None, converter=None, json_options=None, **kwargs):
|
|
"""
|
|
Deserialize JSON data to an XML Element.
|
|
|
|
:param source: can be a string or a :meth:`read()` supporting file-like object \
|
|
containing the JSON document.
|
|
:param schema: an :class:`XMLSchema` instance.
|
|
:param path: is an optional XPath expression for selecting the element of the schema \
|
|
that matches the data that has to be encoded. For default the first global element of \
|
|
the schema is used.
|
|
:param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the encoding.
|
|
:param json_options: a dictionary with options for the JSON deserializer.
|
|
:param kwargs: Keyword arguments containing options for converter and encoding.
|
|
:return: An element tree's Element instance. If ``validation='lax'`` keyword argument is \
|
|
provided the validation errors are collected and returned coupled in a tuple with the \
|
|
Element instance.
|
|
:raises: :exc:`XMLSchemaValidationError` if the object is not encodable by the schema, \
|
|
or also if it's invalid when ``validation='strict'`` is provided.
|
|
"""
|
|
if not isinstance(schema, XMLSchemaBase):
|
|
raise TypeError("An XMLSchema instance required for 'schema' argument: %r" % schema)
|
|
elif json_options is None:
|
|
json_options = {}
|
|
|
|
dict_class = kwargs.pop('dict_class', ordered_dict_class)
|
|
object_hook = json_options.pop('object_hook', ordered_dict_class)
|
|
object_pairs_hook = json_options.pop('object_pairs_hook', ordered_dict_class)
|
|
if hasattr(source, 'read'):
|
|
obj = json.load(source, object_hook=object_hook, object_pairs_hook=object_pairs_hook, **json_options)
|
|
else:
|
|
obj = json.loads(source, object_hook=object_hook, object_pairs_hook=object_pairs_hook, **json_options)
|
|
|
|
return schema.encode(obj, path=path, converter=converter, dict_class=dict_class, **kwargs)
|