Improve content model encoding

- Update iter_collapsed_content() to perform a model conforming
    reordering.
This commit is contained in:
Davide Brunato 2019-10-13 11:01:05 +02:00
parent de7e2343bd
commit 249e555659
9 changed files with 210 additions and 27 deletions

View File

@ -2,12 +2,13 @@
CHANGELOG
*********
`v1.0.15`_ (2019-10-11)
`v1.0.15`_ (2019-10-13)
=======================
* Improved XPath 2.0 bindings
* Added logging for schema initialization and building
* Added logging for schema initialization and building (handled with argument *loglevel*)
* Update encoding of collapsed contents with a new model based reordering method
* Removed XLink namespace from meta-schema (loaded from a fallback location like XHTML)
* Fixed half of failed W3C instance tests (remain 266 over 15344 tests)
* Fixed half of failed W3C instance tests (remain 255 over 15344 tests)
`v1.0.14`_ (2019-08-27)
=======================

View File

@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2'
name: xmlschema
url: 'https://github.com/sissaschool/xmlschema'
landingURL: 'https://github.com/sissaschool/xmlschema'
releaseDate: '2019-10-11'
releaseDate: '2019-10-13'
softwareVersion: v1.0.15
developmentStatus: stable
platforms:

View File

@ -98,7 +98,11 @@ def make_validator_test_class(test_file, test_args, test_num, schema_class, chec
for _ in iter_nested_items(data1, dict_class=ordered_dict_class):
pass
elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs)
try:
elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs)
except XMLSchemaValidationError as err:
raise AssertionError(str(err) + msg_tmpl % "error during re-encoding")
if isinstance(elem1, tuple):
# When validation='lax'
if converter is not ParkerConverter:

View File

@ -580,6 +580,7 @@ class TestModelValidation11(TestModelValidation):
class TestModelBasedSorting(XsdValidatorTestCase):
def test_sort_content(self):
# test of ModelVisitor's sort_content/iter_unordered_content
schema = self.get_schema("""
<xs:element name="A" type="A_type" />
<xs:complexType name="A_type">
@ -641,6 +642,161 @@ class TestModelBasedSorting(XsdValidatorTestCase):
model.sort_content([('B3', True), ('B2', 10)]), [('B2', 10), ('B3', True)]
)
def test_iter_collapsed_content_with_optional_elements(self):
schema = self.get_schema("""
<xs:element name="A" type="A_type" />
<xs:complexType name="A_type">
<xs:sequence>
<xs:element name="B1" minOccurs="0" />
<xs:element name="B2" minOccurs="0" />
<xs:element name="B3" />
<xs:element name="B4" />
<xs:element name="B5" />
<xs:element name="B6" minOccurs="0" />
<xs:element name="B7" />
</xs:sequence>
</xs:complexType>
""")
model = ModelVisitor(schema.types['A_type'].content_type)
content = [('B3', 10), ('B4', None), ('B5', True), ('B6', 'alpha'), ('B7', 20)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)), content
)
content = [('B3', 10), ('B5', True), ('B6', 'alpha'), ('B7', 20)] # Missing B4
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)), content
)
def test_iter_collapsed_content_with_repeated_elements(self):
schema = self.get_schema("""
<xs:element name="A" type="A_type" />
<xs:complexType name="A_type">
<xs:sequence>
<xs:element name="B1" minOccurs="0" />
<xs:element name="B2" minOccurs="0" maxOccurs="unbounded" />
<xs:element name="B3" maxOccurs="unbounded" />
<xs:element name="B4" />
<xs:element name="B5" maxOccurs="unbounded" />
<xs:element name="B6" minOccurs="0" />
<xs:element name="B7" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
""")
model = ModelVisitor(schema.types['A_type'].content_type)
content = [
('B3', 10), ('B4', None), ('B5', True), ('B5', False), ('B6', 'alpha'), ('B7', 20)
]
self.assertListEqual(
list(model.iter_collapsed_content(content)), content
)
content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True),
('B5', False), ('B6', 'alpha'), ('B7', 20), ('B7', 30)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)), content
)
content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True), ('B5', False)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)), content
)
def test_iter_collapsed_content_with_repeated_groups(self):
schema = self.get_schema("""
<xs:element name="A" type="A_type" />
<xs:complexType name="A_type">
<xs:sequence minOccurs="1" maxOccurs="2">
<xs:element name="B1" minOccurs="0" />
<xs:element name="B2" minOccurs="0" />
</xs:sequence>
</xs:complexType>
""")
model = ModelVisitor(schema.types['A_type'].content_type)
content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)]
self.assertListEqual(
list(model.iter_collapsed_content(content)),
[('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4)]
)
# Model broken by unknown element at start
content = [('X', None), ('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B1', 1), ('X', None), ('B1', 2), ('B2', 3), ('B2', 4)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B1', 1), ('B1', 2), ('X', None), ('B2', 3), ('B2', 4)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B1', 1), ('B1', 2), ('B2', 3), ('X', None), ('B2', 4)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)),
[('B1', 1), ('B2', 3), ('B1', 2), ('X', None), ('B2', 4)]
)
content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4), ('X', None)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)),
[('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4), ('X', None)]
)
def test_iter_collapsed_content_with_single_elements(self):
schema = self.get_schema("""
<xs:element name="A" type="A_type" />
<xs:complexType name="A_type">
<xs:sequence>
<xs:element name="B1" />
<xs:element name="B2" />
<xs:element name="B3" />
</xs:sequence>
</xs:complexType>
""")
model = ModelVisitor(schema.types['A_type'].content_type)
content = [('B1', 'abc'), ('B2', 10), ('B3', False)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B3', False), ('B1', 'abc'), ('B2', 10)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B1', 'abc'), ('B3', False), ('B2', 10)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('B1', 'abc'), ('B1', 'def'), ('B2', 10), ('B3', False)]
model.restart()
self.assertListEqual(
list(model.iter_collapsed_content(content)),
[('B1', 'abc'), ('B2', 10), ('B3', False), ('B1', 'def')]
)
content = [('B1', 'abc'), ('B2', 10), ('X', None)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
content = [('X', None), ('B1', 'abc'), ('B2', 10), ('B3', False)]
model.restart()
self.assertListEqual(list(model.iter_collapsed_content(content)), content)
if __name__ == '__main__':
from xmlschema.tests import print_test_header

View File

@ -374,8 +374,8 @@ class TestEncoding(XsdValidatorTestCase):
</xs:element>
""")
with self.assertRaises(XMLSchemaChildrenValidationError):
schema.to_etree({"A": [1, 2], "B": [3, 4]})
root = schema.to_etree(ordered_dict_class([('A', [1, 2]), ('B', [3, 4])]))
self.assertListEqual([e.text for e in root], ['1', '3', '2', '4'])
root = schema.to_etree({"A": [1, 2], "B": [3, 4]}, converter=UnorderedConverter)
self.assertListEqual([e.text for e in root], ['1', '3', '2', '4'])

View File

@ -225,7 +225,11 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError):
if hasattr(self.validator, 'tostring'):
msg.append("Schema:\n\n%s\n" % self.validator.tostring(' ', 20))
if is_etree_element(self.elem):
elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20)
try:
elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20)
except (ValueError, TypeError):
elem_as_string = repr(self.elem)
if hasattr(self.elem, 'sourceline'):
msg.append("Instance (line %r):\n\n%s\n" % (self.elem.sourceline, elem_as_string))
else:

View File

@ -712,12 +712,12 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
cdata_index = 0
if isinstance(element_data.content, dict) or kwargs.get('unordered'):
content = model.iter_unordered_content(element_data.content)
elif not isinstance(element_data.content, list):
content = []
elif converter.losslessly:
content = element_data.content
elif isinstance(element_data.content, list):
content = model.iter_collapsed_content(element_data.content)
else:
content = []
content = ModelVisitor(self).iter_collapsed_content(element_data.content)
for index, (name, value) in enumerate(content):
if isinstance(name, int):

View File

@ -607,26 +607,42 @@ class ModelVisitor(MutableSequence):
"""
prev_name = None
unordered_content = defaultdict(deque)
for name, value in content:
if isinstance(name, int) or self.element is None:
yield name, value
elif prev_name != name:
continue
while self.element is not None:
if self.element.is_matching(name):
yield name, value
prev_name = name
for _ in self.advance(True):
pass
break
for key in unordered_content:
if self.element.is_matching(key):
break
else:
if prev_name == name:
unordered_content[name].append(value)
break
for _ in self.advance(False):
pass
continue
try:
yield key, unordered_content[key].popleft()
except IndexError:
del unordered_content[key]
else:
for _ in self.advance(True):
pass
else:
yield name, value
prev_name = name
elif self.element.is_matching(name):
yield name, value
else:
unordered_content[name].append(value)
while self.element is not None and unordered_content:
for key in unordered_content:
if self.element.is_matching(key):
try:
yield name, unordered_content[key].popleft()
except IndexError:
del unordered_content[key]
break
else:
break
# Add the remaining consumable content onto the end of the data.
for name, values in unordered_content.items():

View File

@ -178,7 +178,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
namespace has been overridden by an import. Ignored if the argument *global_maps* is provided.
:type use_meta: bool
:param loglevel: for setting a different logging level for schema initialization \
and building. For default is WARNING (30).
and building. For default is WARNING (30). For INFO level set it with 20, for \
DEBUG level with 10. The default loglevel is restored after schema building, \
when exiting the initialization method.
:type loglevel: int
:cvar XSD_VERSION: store the XSD version (1.0 or 1.1).