352 lines
16 KiB
Python
352 lines
16 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
|
|
# All rights reserved.
|
|
# This file is distributed under the terms of the MIT License.
|
|
# See the file 'LICENSE' in the root directory of the present
|
|
# distribution, or http://opensource.org/licenses/MIT.
|
|
#
|
|
# @author Davide Brunato <brunato@sissa.it>
|
|
#
|
|
import unittest
|
|
import pdb
|
|
import os
|
|
import sys
|
|
import pickle
|
|
import warnings
|
|
|
|
import xmlschema
|
|
from xmlschema import XMLSchemaValidationError, ParkerConverter, \
|
|
BadgerFishConverter, AbderaConverter, JsonMLConverter
|
|
|
|
from xmlschema.compat import unicode_type, ordered_dict_class
|
|
from xmlschema.etree import etree_tostring, ElementTree, \
|
|
etree_elements_assert_equal, lxml_etree, lxml_etree_element
|
|
from xmlschema.qnames import XSI_TYPE
|
|
from xmlschema.resources import fetch_namespaces
|
|
|
|
from xmlschema.tests import XsdValidatorTestCase
|
|
from . import tests_factory
|
|
|
|
|
|
def iter_nested_items(items, dict_class=dict, list_class=list):
|
|
if isinstance(items, dict_class):
|
|
for k, v in items.items():
|
|
for value in iter_nested_items(v, dict_class, list_class):
|
|
yield value
|
|
elif isinstance(items, list_class):
|
|
for item in items:
|
|
for value in iter_nested_items(item, dict_class, list_class):
|
|
yield value
|
|
elif isinstance(items, dict):
|
|
raise TypeError("%r: is a dict() instead of %r." % (items, dict_class))
|
|
elif isinstance(items, list):
|
|
raise TypeError("%r: is a list() instead of %r." % (items, list_class))
|
|
else:
|
|
yield items
|
|
|
|
|
|
def make_validator_test_class(test_file, test_args, test_num, schema_class, check_with_lxml):
|
|
"""
|
|
Creates a validator test class.
|
|
|
|
:param test_file: the XML test file path.
|
|
:param test_args: line arguments for test case.
|
|
:param test_num: a positive integer number associated with the test case.
|
|
:param schema_class: the schema class to use.
|
|
:param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \
|
|
Works only for XSD 1.0 tests.
|
|
"""
|
|
xml_file = os.path.relpath(test_file)
|
|
msg_tmpl = "\n\n{}: %s.".format(xml_file)
|
|
|
|
# Extract schema test arguments
|
|
expected_errors = test_args.errors
|
|
expected_warnings = test_args.warnings
|
|
inspect = test_args.inspect
|
|
locations = test_args.locations
|
|
defuse = test_args.defuse
|
|
skip_strict = test_args.skip
|
|
debug_mode = test_args.debug
|
|
|
|
class TestValidator(XsdValidatorTestCase):
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
# Builds schema instance using 'lax' validation mode to accepts also schemas with not crashing errors.
|
|
cls.schema_class = schema_class
|
|
source, _locations = xmlschema.fetch_schema_locations(xml_file, locations)
|
|
cls.schema = schema_class(source, validation='lax', locations=_locations, defuse=defuse)
|
|
if check_with_lxml and lxml_etree is not None:
|
|
cls.lxml_schema = lxml_etree.parse(source)
|
|
|
|
cls.errors = []
|
|
cls.chunks = []
|
|
cls.longMessage = True
|
|
|
|
if debug_mode:
|
|
print("\n##\n## Testing %r validation in debug mode.\n##" % xml_file)
|
|
pdb.set_trace()
|
|
|
|
def check_etree_encode(self, root, converter=None, **kwargs):
|
|
namespaces = kwargs.get('namespaces', {})
|
|
data1 = self.schema.decode(root, converter=converter, **kwargs)
|
|
if isinstance(data1, tuple):
|
|
data1 = data1[0] # When validation='lax'
|
|
|
|
for _ in iter_nested_items(data1, dict_class=ordered_dict_class):
|
|
pass
|
|
|
|
try:
|
|
elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs)
|
|
except XMLSchemaValidationError as err:
|
|
raise AssertionError(str(err) + msg_tmpl % "error during re-encoding")
|
|
|
|
if isinstance(elem1, tuple):
|
|
# When validation='lax'
|
|
if converter is not ParkerConverter:
|
|
for e in elem1[1]:
|
|
self.check_namespace_prefixes(unicode_type(e))
|
|
elem1 = elem1[0]
|
|
|
|
# Checks the encoded element to not contains reserved namespace prefixes
|
|
if namespaces and all('ns%d' % k not in namespaces for k in range(10)):
|
|
self.check_namespace_prefixes(etree_tostring(elem1, namespaces=namespaces))
|
|
|
|
# Main check: compare original a re-encoded tree
|
|
try:
|
|
etree_elements_assert_equal(root, elem1, strict=False)
|
|
except AssertionError as err:
|
|
# If the check fails retry only if the converter is lossy (eg. ParkerConverter)
|
|
# or if the XML case has defaults taken from the schema or some part of data
|
|
# decoding is skipped by schema wildcards (set the specific argument in testfiles).
|
|
if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict:
|
|
if debug_mode:
|
|
pdb.set_trace()
|
|
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original")
|
|
elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
|
|
return # can't check encode equivalence if xsi:type is provided
|
|
else:
|
|
# Lossy or augmenting cases are checked after another decoding/encoding pass
|
|
data2 = self.schema.decode(elem1, converter=converter, **kwargs)
|
|
if isinstance(data2, tuple):
|
|
data2 = data2[0]
|
|
|
|
if sys.version_info >= (3, 6):
|
|
# For Python < 3.6 cannot ensure attribute decoding order
|
|
try:
|
|
self.assertEqual(data1, data2, msg_tmpl % "re-decoded data changed")
|
|
except AssertionError:
|
|
if debug_mode:
|
|
pdb.set_trace()
|
|
raise
|
|
|
|
elem2 = self.schema.encode(data2, path=root.tag, converter=converter, **kwargs)
|
|
if isinstance(elem2, tuple):
|
|
elem2 = elem2[0]
|
|
|
|
try:
|
|
etree_elements_assert_equal(elem1, elem2, strict=False)
|
|
except AssertionError as err:
|
|
if debug_mode:
|
|
pdb.set_trace()
|
|
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass")
|
|
|
|
def check_json_serialization(self, root, converter=None, **kwargs):
|
|
data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs)
|
|
if isinstance(data1, tuple):
|
|
data1 = data1[0]
|
|
|
|
elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs)
|
|
if isinstance(elem1, tuple):
|
|
elem1 = elem1[0]
|
|
|
|
data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs)
|
|
if isinstance(data2, tuple):
|
|
data2 = data2[0]
|
|
|
|
if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
|
|
return # can't check encode equivalence if xsi:type is provided
|
|
elif sys.version_info >= (3, 6):
|
|
self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass")
|
|
else:
|
|
elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs)
|
|
if isinstance(elem2, tuple):
|
|
elem2 = elem2[0]
|
|
try:
|
|
self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True))
|
|
except AssertionError as err:
|
|
self.assertIsNone(err, None)
|
|
|
|
def check_decoding_with_element_tree(self):
|
|
del self.errors[:]
|
|
del self.chunks[:]
|
|
|
|
def do_decoding():
|
|
for obj in self.schema.iter_decode(xml_file):
|
|
if isinstance(obj, (xmlschema.XMLSchemaDecodeError, xmlschema.XMLSchemaValidationError)):
|
|
self.errors.append(obj)
|
|
else:
|
|
self.chunks.append(obj)
|
|
|
|
if expected_warnings == 0:
|
|
do_decoding()
|
|
else:
|
|
with warnings.catch_warnings(record=True) as ctx:
|
|
warnings.simplefilter("always")
|
|
do_decoding()
|
|
self.assertEqual(len(ctx), expected_warnings, "Wrong number of include/import warnings")
|
|
|
|
self.check_errors(xml_file, expected_errors)
|
|
|
|
if not self.chunks:
|
|
raise ValueError("No decoded object returned!!")
|
|
elif len(self.chunks) > 1:
|
|
raise ValueError("Too many ({}) decoded objects returned: {}".format(len(self.chunks), self.chunks))
|
|
elif not isinstance(self.chunks[0], dict):
|
|
raise ValueError("Decoded object is not a dictionary: {}".format(self.chunks))
|
|
else:
|
|
self.assertTrue(True, "Successfully test decoding for {}".format(xml_file))
|
|
|
|
def check_schema_serialization(self):
|
|
# Repeat with serialized-deserialized schema (only for Python 3)
|
|
serialized_schema = pickle.dumps(self.schema)
|
|
deserialized_schema = pickle.loads(serialized_schema)
|
|
errors = []
|
|
chunks = []
|
|
for obj in deserialized_schema.iter_decode(xml_file):
|
|
if isinstance(obj, xmlschema.XMLSchemaValidationError):
|
|
errors.append(obj)
|
|
else:
|
|
chunks.append(obj)
|
|
|
|
self.assertEqual(len(errors), len(self.errors), msg_tmpl % "wrong number errors")
|
|
self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data differ")
|
|
|
|
def check_decode_api(self):
|
|
# Compare with the decode API and other validation modes
|
|
strict_data = self.schema.decode(xml_file)
|
|
lax_data = self.schema.decode(xml_file, validation='lax')
|
|
skip_data = self.schema.decode(xml_file, validation='skip')
|
|
self.assertEqual(strict_data, self.chunks[0], msg_tmpl % "decode() API has a different result")
|
|
self.assertEqual(lax_data[0], self.chunks[0], msg_tmpl % "'lax' validation has a different result")
|
|
self.assertEqual(skip_data, self.chunks[0], msg_tmpl % "'skip' validation has a different result")
|
|
|
|
def check_encoding_with_element_tree(self):
|
|
root = ElementTree.parse(xml_file).getroot()
|
|
namespaces = fetch_namespaces(xml_file)
|
|
options = {'namespaces': namespaces, 'dict_class': ordered_dict_class}
|
|
|
|
self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter
|
|
self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
|
|
self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
|
|
self.check_etree_encode(root, BadgerFishConverter, **options)
|
|
self.check_etree_encode(root, AbderaConverter, **options)
|
|
self.check_etree_encode(root, JsonMLConverter, **options)
|
|
|
|
options.pop('dict_class')
|
|
self.check_json_serialization(root, cdata_prefix='#', **options)
|
|
self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
|
|
self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
|
|
self.check_json_serialization(root, BadgerFishConverter, **options)
|
|
self.check_json_serialization(root, AbderaConverter, **options)
|
|
self.check_json_serialization(root, JsonMLConverter, **options)
|
|
|
|
def check_decoding_and_encoding_with_lxml(self):
|
|
xml_tree = lxml_etree.parse(xml_file)
|
|
namespaces = fetch_namespaces(xml_file)
|
|
|
|
errors = []
|
|
chunks = []
|
|
for obj in self.schema.iter_decode(xml_tree, namespaces=namespaces):
|
|
if isinstance(obj, xmlschema.XMLSchemaValidationError):
|
|
errors.append(obj)
|
|
else:
|
|
chunks.append(obj)
|
|
|
|
self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data change with lxml")
|
|
self.assertEqual(len(errors), len(self.errors), msg_tmpl % "errors number change with lxml")
|
|
|
|
if not errors:
|
|
root = xml_tree.getroot()
|
|
if namespaces.get(''):
|
|
# Add a not empty prefix for encoding to avoid the use of reserved prefix ns0
|
|
namespaces['tns0'] = namespaces['']
|
|
|
|
options = {
|
|
'etree_element_class': lxml_etree_element,
|
|
'namespaces': namespaces,
|
|
'dict_class': ordered_dict_class,
|
|
}
|
|
self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter
|
|
self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
|
|
self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
|
|
self.check_etree_encode(root, BadgerFishConverter, **options)
|
|
self.check_etree_encode(root, AbderaConverter, **options)
|
|
self.check_etree_encode(root, JsonMLConverter, **options)
|
|
|
|
options.pop('dict_class')
|
|
self.check_json_serialization(root, cdata_prefix='#', **options)
|
|
self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
|
|
self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
|
|
self.check_json_serialization(root, BadgerFishConverter, **options)
|
|
self.check_json_serialization(root, AbderaConverter, **options)
|
|
self.check_json_serialization(root, JsonMLConverter, **options)
|
|
|
|
def check_validate_and_is_valid_api(self):
|
|
if expected_errors:
|
|
self.assertFalse(self.schema.is_valid(xml_file), msg_tmpl % "file with errors is valid")
|
|
self.assertRaises(XMLSchemaValidationError, self.schema.validate, xml_file)
|
|
else:
|
|
self.assertTrue(self.schema.is_valid(xml_file), msg_tmpl % "file without errors is not valid")
|
|
self.assertEqual(self.schema.validate(xml_file), None,
|
|
msg_tmpl % "file without errors not validated")
|
|
|
|
def check_iter_errors(self):
|
|
self.assertEqual(len(list(self.schema.iter_errors(xml_file))), expected_errors,
|
|
msg_tmpl % "wrong number of errors (%d expected)" % expected_errors)
|
|
|
|
def check_lxml_validation(self):
|
|
try:
|
|
schema = lxml_etree.XMLSchema(self.lxml_schema.getroot())
|
|
except lxml_etree.XMLSchemaParseError:
|
|
print("\nSkip lxml.etree.XMLSchema validation test for {!r} ({})".
|
|
format(xml_file, TestValidator.__name__, ))
|
|
else:
|
|
xml_tree = lxml_etree.parse(xml_file)
|
|
if self.errors:
|
|
self.assertFalse(schema.validate(xml_tree))
|
|
else:
|
|
self.assertTrue(schema.validate(xml_tree))
|
|
|
|
def test_xml_document_validation(self):
|
|
self.check_decoding_with_element_tree()
|
|
|
|
if not inspect and sys.version_info >= (3,):
|
|
self.check_schema_serialization()
|
|
|
|
if not self.errors:
|
|
self.check_encoding_with_element_tree()
|
|
|
|
if lxml_etree is not None:
|
|
self.check_decoding_and_encoding_with_lxml()
|
|
|
|
self.check_iter_errors()
|
|
self.check_validate_and_is_valid_api()
|
|
if check_with_lxml and lxml_etree is not None:
|
|
self.check_lxml_validation()
|
|
|
|
TestValidator.__name__ = TestValidator.__qualname__ = 'TestValidator{0:03}'.format(test_num)
|
|
return TestValidator
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from xmlschema.tests import print_test_header
|
|
|
|
# Creates decoding/encoding tests classes from XML files
|
|
globals().update(tests_factory(make_validator_test_class, 'xml'))
|
|
|
|
print_test_header()
|
|
unittest.main()
|