debian-xmlschema/xmlschema/tests/test_factory/validation_tests.py

352 lines
16 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
# All rights reserved.
# This file is distributed under the terms of the MIT License.
# See the file 'LICENSE' in the root directory of the present
# distribution, or http://opensource.org/licenses/MIT.
#
# @author Davide Brunato <brunato@sissa.it>
#
import unittest
import pdb
import os
import sys
import pickle
import warnings
import xmlschema
from xmlschema import XMLSchemaValidationError, ParkerConverter, \
BadgerFishConverter, AbderaConverter, JsonMLConverter
from xmlschema.compat import unicode_type, ordered_dict_class
from xmlschema.etree import etree_tostring, ElementTree, \
etree_elements_assert_equal, lxml_etree, lxml_etree_element
from xmlschema.qnames import XSI_TYPE
from xmlschema.resources import fetch_namespaces
from xmlschema.tests import XsdValidatorTestCase
from . import tests_factory
def iter_nested_items(items, dict_class=dict, list_class=list):
if isinstance(items, dict_class):
for k, v in items.items():
for value in iter_nested_items(v, dict_class, list_class):
yield value
elif isinstance(items, list_class):
for item in items:
for value in iter_nested_items(item, dict_class, list_class):
yield value
elif isinstance(items, dict):
raise TypeError("%r: is a dict() instead of %r." % (items, dict_class))
elif isinstance(items, list):
raise TypeError("%r: is a list() instead of %r." % (items, list_class))
else:
yield items
def make_validator_test_class(test_file, test_args, test_num, schema_class, check_with_lxml):
"""
Creates a validator test class.
:param test_file: the XML test file path.
:param test_args: line arguments for test case.
:param test_num: a positive integer number associated with the test case.
:param schema_class: the schema class to use.
:param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \
Works only for XSD 1.0 tests.
"""
xml_file = os.path.relpath(test_file)
msg_tmpl = "\n\n{}: %s.".format(xml_file)
# Extract schema test arguments
expected_errors = test_args.errors
expected_warnings = test_args.warnings
inspect = test_args.inspect
locations = test_args.locations
defuse = test_args.defuse
skip_strict = test_args.skip
debug_mode = test_args.debug
class TestValidator(XsdValidatorTestCase):
@classmethod
def setUpClass(cls):
# Builds schema instance using 'lax' validation mode to accepts also schemas with not crashing errors.
cls.schema_class = schema_class
source, _locations = xmlschema.fetch_schema_locations(xml_file, locations)
cls.schema = schema_class(source, validation='lax', locations=_locations, defuse=defuse)
if check_with_lxml and lxml_etree is not None:
cls.lxml_schema = lxml_etree.parse(source)
cls.errors = []
cls.chunks = []
cls.longMessage = True
if debug_mode:
print("\n##\n## Testing %r validation in debug mode.\n##" % xml_file)
pdb.set_trace()
def check_etree_encode(self, root, converter=None, **kwargs):
namespaces = kwargs.get('namespaces', {})
data1 = self.schema.decode(root, converter=converter, **kwargs)
if isinstance(data1, tuple):
data1 = data1[0] # When validation='lax'
for _ in iter_nested_items(data1, dict_class=ordered_dict_class):
pass
try:
elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs)
except XMLSchemaValidationError as err:
raise AssertionError(str(err) + msg_tmpl % "error during re-encoding")
if isinstance(elem1, tuple):
# When validation='lax'
if converter is not ParkerConverter:
for e in elem1[1]:
self.check_namespace_prefixes(unicode_type(e))
elem1 = elem1[0]
# Checks the encoded element to not contains reserved namespace prefixes
if namespaces and all('ns%d' % k not in namespaces for k in range(10)):
self.check_namespace_prefixes(etree_tostring(elem1, namespaces=namespaces))
# Main check: compare original a re-encoded tree
try:
etree_elements_assert_equal(root, elem1, strict=False)
except AssertionError as err:
# If the check fails retry only if the converter is lossy (eg. ParkerConverter)
# or if the XML case has defaults taken from the schema or some part of data
# decoding is skipped by schema wildcards (set the specific argument in testfiles).
if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict:
if debug_mode:
pdb.set_trace()
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original")
elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
return # can't check encode equivalence if xsi:type is provided
else:
# Lossy or augmenting cases are checked after another decoding/encoding pass
data2 = self.schema.decode(elem1, converter=converter, **kwargs)
if isinstance(data2, tuple):
data2 = data2[0]
if sys.version_info >= (3, 6):
# For Python < 3.6 cannot ensure attribute decoding order
try:
self.assertEqual(data1, data2, msg_tmpl % "re-decoded data changed")
except AssertionError:
if debug_mode:
pdb.set_trace()
raise
elem2 = self.schema.encode(data2, path=root.tag, converter=converter, **kwargs)
if isinstance(elem2, tuple):
elem2 = elem2[0]
try:
etree_elements_assert_equal(elem1, elem2, strict=False)
except AssertionError as err:
if debug_mode:
pdb.set_trace()
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass")
def check_json_serialization(self, root, converter=None, **kwargs):
data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs)
if isinstance(data1, tuple):
data1 = data1[0]
elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs)
if isinstance(elem1, tuple):
elem1 = elem1[0]
data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs)
if isinstance(data2, tuple):
data2 = data2[0]
if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
return # can't check encode equivalence if xsi:type is provided
elif sys.version_info >= (3, 6):
self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass")
else:
elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs)
if isinstance(elem2, tuple):
elem2 = elem2[0]
try:
self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True))
except AssertionError as err:
self.assertIsNone(err, None)
def check_decoding_with_element_tree(self):
del self.errors[:]
del self.chunks[:]
def do_decoding():
for obj in self.schema.iter_decode(xml_file):
if isinstance(obj, (xmlschema.XMLSchemaDecodeError, xmlschema.XMLSchemaValidationError)):
self.errors.append(obj)
else:
self.chunks.append(obj)
if expected_warnings == 0:
do_decoding()
else:
with warnings.catch_warnings(record=True) as ctx:
warnings.simplefilter("always")
do_decoding()
self.assertEqual(len(ctx), expected_warnings, "Wrong number of include/import warnings")
self.check_errors(xml_file, expected_errors)
if not self.chunks:
raise ValueError("No decoded object returned!!")
elif len(self.chunks) > 1:
raise ValueError("Too many ({}) decoded objects returned: {}".format(len(self.chunks), self.chunks))
elif not isinstance(self.chunks[0], dict):
raise ValueError("Decoded object is not a dictionary: {}".format(self.chunks))
else:
self.assertTrue(True, "Successfully test decoding for {}".format(xml_file))
def check_schema_serialization(self):
# Repeat with serialized-deserialized schema (only for Python 3)
serialized_schema = pickle.dumps(self.schema)
deserialized_schema = pickle.loads(serialized_schema)
errors = []
chunks = []
for obj in deserialized_schema.iter_decode(xml_file):
if isinstance(obj, xmlschema.XMLSchemaValidationError):
errors.append(obj)
else:
chunks.append(obj)
self.assertEqual(len(errors), len(self.errors), msg_tmpl % "wrong number errors")
self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data differ")
def check_decode_api(self):
# Compare with the decode API and other validation modes
strict_data = self.schema.decode(xml_file)
lax_data = self.schema.decode(xml_file, validation='lax')
skip_data = self.schema.decode(xml_file, validation='skip')
self.assertEqual(strict_data, self.chunks[0], msg_tmpl % "decode() API has a different result")
self.assertEqual(lax_data[0], self.chunks[0], msg_tmpl % "'lax' validation has a different result")
self.assertEqual(skip_data, self.chunks[0], msg_tmpl % "'skip' validation has a different result")
def check_encoding_with_element_tree(self):
root = ElementTree.parse(xml_file).getroot()
namespaces = fetch_namespaces(xml_file)
options = {'namespaces': namespaces, 'dict_class': ordered_dict_class}
self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter
self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
self.check_etree_encode(root, BadgerFishConverter, **options)
self.check_etree_encode(root, AbderaConverter, **options)
self.check_etree_encode(root, JsonMLConverter, **options)
options.pop('dict_class')
self.check_json_serialization(root, cdata_prefix='#', **options)
self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
self.check_json_serialization(root, BadgerFishConverter, **options)
self.check_json_serialization(root, AbderaConverter, **options)
self.check_json_serialization(root, JsonMLConverter, **options)
def check_decoding_and_encoding_with_lxml(self):
xml_tree = lxml_etree.parse(xml_file)
namespaces = fetch_namespaces(xml_file)
errors = []
chunks = []
for obj in self.schema.iter_decode(xml_tree, namespaces=namespaces):
if isinstance(obj, xmlschema.XMLSchemaValidationError):
errors.append(obj)
else:
chunks.append(obj)
self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data change with lxml")
self.assertEqual(len(errors), len(self.errors), msg_tmpl % "errors number change with lxml")
if not errors:
root = xml_tree.getroot()
if namespaces.get(''):
# Add a not empty prefix for encoding to avoid the use of reserved prefix ns0
namespaces['tns0'] = namespaces['']
options = {
'etree_element_class': lxml_etree_element,
'namespaces': namespaces,
'dict_class': ordered_dict_class,
}
self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter
self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
self.check_etree_encode(root, BadgerFishConverter, **options)
self.check_etree_encode(root, AbderaConverter, **options)
self.check_etree_encode(root, JsonMLConverter, **options)
options.pop('dict_class')
self.check_json_serialization(root, cdata_prefix='#', **options)
self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
self.check_json_serialization(root, BadgerFishConverter, **options)
self.check_json_serialization(root, AbderaConverter, **options)
self.check_json_serialization(root, JsonMLConverter, **options)
def check_validate_and_is_valid_api(self):
if expected_errors:
self.assertFalse(self.schema.is_valid(xml_file), msg_tmpl % "file with errors is valid")
self.assertRaises(XMLSchemaValidationError, self.schema.validate, xml_file)
else:
self.assertTrue(self.schema.is_valid(xml_file), msg_tmpl % "file without errors is not valid")
self.assertEqual(self.schema.validate(xml_file), None,
msg_tmpl % "file without errors not validated")
def check_iter_errors(self):
self.assertEqual(len(list(self.schema.iter_errors(xml_file))), expected_errors,
msg_tmpl % "wrong number of errors (%d expected)" % expected_errors)
def check_lxml_validation(self):
try:
schema = lxml_etree.XMLSchema(self.lxml_schema.getroot())
except lxml_etree.XMLSchemaParseError:
print("\nSkip lxml.etree.XMLSchema validation test for {!r} ({})".
format(xml_file, TestValidator.__name__, ))
else:
xml_tree = lxml_etree.parse(xml_file)
if self.errors:
self.assertFalse(schema.validate(xml_tree))
else:
self.assertTrue(schema.validate(xml_tree))
def test_xml_document_validation(self):
self.check_decoding_with_element_tree()
if not inspect and sys.version_info >= (3,):
self.check_schema_serialization()
if not self.errors:
self.check_encoding_with_element_tree()
if lxml_etree is not None:
self.check_decoding_and_encoding_with_lxml()
self.check_iter_errors()
self.check_validate_and_is_valid_api()
if check_with_lxml and lxml_etree is not None:
self.check_lxml_validation()
TestValidator.__name__ = TestValidator.__qualname__ = 'TestValidator{0:03}'.format(test_num)
return TestValidator
if __name__ == '__main__':
from xmlschema.tests import print_test_header
# Creates decoding/encoding tests classes from XML files
globals().update(tests_factory(make_validator_test_class, 'xml'))
print_test_header()
unittest.main()