# -*- coding: utf-8 -*- # # Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). # All rights reserved. # This file is distributed under the terms of the MIT License. # See the file 'LICENSE' in the root directory of the present # distribution, or http://opensource.org/licenses/MIT. # # @author Davide Brunato # from __future__ import unicode_literals import json from .compat import ordered_dict_class from .resources import fetch_schema_locations, XMLResource from .validators.schema import XMLSchema, XMLSchemaBase def get_context(source, schema=None, cls=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False): """ Helper method for obtaining XML document validation/decode context. Return an XMLResource instance and a schema instance. """ if cls is None: cls = XMLSchema try: schema, locations = fetch_schema_locations(source, locations, base_url=base_url) except ValueError: if schema is None: raise elif not isinstance(schema, XMLSchemaBase): schema = cls(schema, validation='strict', locations=locations, base_url=base_url, defuse=defuse, timeout=timeout) else: schema = cls(schema, validation='strict', locations=locations, defuse=defuse, timeout=timeout) if not isinstance(source, XMLResource): source = XMLResource(source, defuse=defuse, timeout=timeout, lazy=lazy) return source, schema def validate(xml_document, schema=None, cls=None, path=None, schema_path=None, use_defaults=True, namespaces=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False): """ Validates an XML document against a schema instance. This function builds an :class:`XMLSchema` object for validating the XML document. Raises an :exc:`XMLSchemaValidationError` if the XML document is not validated against the schema. :param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \ to a file or an URI of a resource or an Element instance or an ElementTree instance or \ a string containing the XML data. If the passed argument is not an :class:`XMLResource` \ instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments. :param schema: can be a schema instance or a file-like object or a file path or a URL \ of a resource or a string containing the schema. :param cls: class to use for building the schema instance (for default :class:`XMLSchema` is used). :param path: is an optional XPath expression that matches the elements of the XML \ data that have to be decoded. If not provided the XML root element is used. :param schema_path: an XPath expression to select the XSD element to use for decoding. \ If not provided the *path* argument or the *source* root tag are used. :param use_defaults: defines when to use element and attribute defaults for filling \ missing required values. :param namespaces: is an optional mapping from namespace prefix to URI. :param locations: additional schema location hints, in case a schema instance has to be built. :param base_url: is an optional custom base URL for remapping relative locations, for \ default uses the directory where the XSD or alternatively the XML document is located. :param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances. :param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances. :param lazy: optional argument for construct the :class:`XMLResource` instance. """ source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy) schema.validate(source, path, schema_path, use_defaults, namespaces) def to_dict(xml_document, schema=None, cls=None, path=None, process_namespaces=True, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False, **kwargs): """ Decodes an XML document to a Python's nested dictionary. The decoding is based on an XML Schema class instance. For default the document is validated during the decoding phase. Raises an :exc:`XMLSchemaValidationError` if the XML document is not validated against the schema. :param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \ to a file or an URI of a resource or an Element instance or an ElementTree instance or \ a string containing the XML data. If the passed argument is not an :class:`XMLResource` \ instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments. :param schema: can be a schema instance or a file-like object or a file path or a URL \ of a resource or a string containing the schema. :param cls: class to use for building the schema instance (for default uses :class:`XMLSchema`). :param path: is an optional XPath expression that matches the elements of the XML \ data that have to be decoded. If not provided the XML root element is used. :param process_namespaces: indicates whether to use namespace information in the decoding process. :param locations: additional schema location hints, in case a schema instance has to be built. :param base_url: is an optional custom base URL for remapping relative locations, for \ default uses the directory where the XSD or alternatively the XML document is located. :param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances. :param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances. :param lazy: optional argument for construct the :class:`XMLResource` instance. :param kwargs: other optional arguments of :meth:`XMLSchema.iter_decode` as keyword arguments. :return: an object containing the decoded data. If ``validation='lax'`` keyword argument \ is provided the validation errors are collected and returned coupled in a tuple with the \ decoded data. :raises: :exc:`XMLSchemaValidationError` if the object is not decodable by \ the XSD component, or also if it's invalid when ``validation='strict'`` is provided. """ source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy) return schema.to_dict(source, path=path, process_namespaces=process_namespaces, **kwargs) def to_json(xml_document, fp=None, schema=None, cls=None, path=None, converter=None, process_namespaces=True, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False, json_options=None, **kwargs): """ Serialize an XML document to JSON. For default the XML data is validated during the decoding phase. Raises an :exc:`XMLSchemaValidationError` if the XML document is not validated against the schema. :param xml_document: can be an :class:`XMLResource` instance, a file-like object a path \ to a file or an URI of a resource or an Element instance or an ElementTree instance or \ a string containing the XML data. If the passed argument is not an :class:`XMLResource` \ instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments. :param fp: can be a :meth:`write()` supporting file-like object. :param schema: can be a schema instance or a file-like object or a file path or an URL \ of a resource or a string containing the schema. :param cls: schema class to use for building the instance (for default uses :class:`XMLSchema`). :param path: is an optional XPath expression that matches the elements of the XML \ data that have to be decoded. If not provided the XML root element is used. :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding. :param process_namespaces: indicates whether to use namespace information in the decoding process. :param locations: additional schema location hints, in case a schema instance has to be built. :param base_url: is an optional custom base URL for remapping relative locations, for \ default uses the directory where the XSD or alternatively the XML document is located. :param defuse: optional argument to pass for construct schema and :class:`XMLResource` instances. :param timeout: optional argument to pass for construct schema and :class:`XMLResource` instances. :param lazy: optional argument for construct the :class:`XMLResource` instance. :param json_options: a dictionary with options for the JSON serializer. :param kwargs: optional arguments of :meth:`XMLSchema.iter_decode` as keyword arguments \ to variate the decoding process. :return: a string containing the JSON data if *fp* is `None`, otherwise doesn't return anything. \ If ``validation='lax'`` keyword argument is provided the validation errors are collected and \ returned, eventually coupled in a tuple with the JSON data. :raises: :exc:`XMLSchemaValidationError` if the object is not decodable by \ the XSD component, or also if it's invalid when ``validation='strict'`` is provided. """ source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy) if json_options is None: json_options = {} decimal_type = kwargs.pop('decimal_type', float) dict_class = kwargs.pop('dict_class', ordered_dict_class) obj = schema.to_dict(source, path=path, decimal_type=decimal_type, dict_class=dict_class, process_namespaces=process_namespaces, converter=converter, **kwargs) if isinstance(obj, tuple): if fp is not None: json.dump(obj[0], fp, **kwargs) return tuple(obj[1]) else: return json.dumps(obj[0], **json_options), tuple(obj[1]) elif fp is not None: json.dump(obj, fp, **json_options) else: return json.dumps(obj, **json_options) def from_json(source, schema, path=None, converter=None, json_options=None, **kwargs): """ Deserialize JSON data to an XML Element. :param source: can be a string or a :meth:`read()` supporting file-like object \ containing the JSON document. :param schema: an :class:`XMLSchema` instance. :param path: is an optional XPath expression for selecting the element of the schema \ that matches the data that has to be encoded. For default the first global element of \ the schema is used. :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the encoding. :param json_options: a dictionary with options for the JSON deserializer. :param kwargs: Keyword arguments containing options for converter and encoding. :return: An element tree's Element instance. If ``validation='lax'`` keyword argument is \ provided the validation errors are collected and returned coupled in a tuple with the \ Element instance. :raises: :exc:`XMLSchemaValidationError` if the object is not encodable by the schema, \ or also if it's invalid when ``validation='strict'`` is provided. """ if not isinstance(schema, XMLSchemaBase): raise TypeError("An XMLSchema instance required for 'schema' argument: %r" % schema) elif json_options is None: json_options = {} dict_class = kwargs.pop('dict_class', ordered_dict_class) object_hook = json_options.pop('object_hook', ordered_dict_class) object_pairs_hook = json_options.pop('object_pairs_hook', ordered_dict_class) if hasattr(source, 'read'): obj = json.load(source, object_hook=object_hook, object_pairs_hook=object_pairs_hook, **json_options) else: obj = json.loads(source, object_hook=object_hook, object_pairs_hook=object_pairs_hook, **json_options) return schema.encode(obj, path=path, converter=converter, dict_class=dict_class, **kwargs)