From 1ff234b69052c65a91ed8b1948fbc05f648c37b3 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 27 Aug 2019 14:15:56 +0200 Subject: [PATCH 01/36] Add publiccode.yml for Italian Public Services registry --- publiccode.yml | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 publiccode.yml diff --git a/publiccode.yml b/publiccode.yml new file mode 100644 index 0000000..1702b5f --- /dev/null +++ b/publiccode.yml @@ -0,0 +1,67 @@ +# This repository adheres to the publiccode.yml standard by including this +# metadata file that makes public software easily discoverable. +# More info at https://github.com/italia/publiccode.yml + +publiccodeYmlVersion: '0.2' +name: xmlschema +url: 'https://github.com/sissaschool/xmlschema' +landingURL: 'https://github.com/sissaschool/xmlschema' +releaseDate: '2019-08-27' +softwareVersion: v1.0.14 +developmentStatus: stable +platforms: + - linux + - windows + - mac +softwareType: library +inputTypes: + - XSD schemas + - XML data +categories: + - data-analytics + - data-collection +maintenance: + type: internal + contacts: + - name: Davide Brunato + email: davide.brunato@sissa.it + affiliation: ' Scuola Internazionale Superiore di Studi Avanzati' +legal: + license: MIT + mainCopyrightOwner: Scuola Internazionale Superiore di Studi Avanzati + repoOwner: Scuola Internazionale Superiore di Studi Avanzati +localisation: + localisationReady: false + availableLanguages: + - en +it: + riuso: + codiceIPA: sissa + piattaforme: + spid: false +description: + en: + genericName: xmlschema + apiDocumentation: 'https://xmlschema.readthedocs.io/en/latest/api.html' + documentation: 'http://xmlschema.readthedocs.io/en/latest/' + shortDescription: XML Schema validator and data conversion library for Python + longDescription: > + The _xmlschema_ library is an implementation of [XML + Schema](http://www.w3.org/2001/XMLSchema) for Python (supports Python 2.7 + and Python 3.5+). + + + This library arises from the needs of a solid Python layer for processing + XML Schema based files for [MaX (Materials design at the + Exascale)](http://www.max-centre.eu/) European project. A significant + problem is the encoding and the decoding of the XML data files produced by + different simulation software. Another important requirement is the XML + data validation, in order to put the produced data under control. The lack + of a suitable alternative for Python in the schema-based decoding of XML + data has led to build this library. Obviously this library can be useful + for other cases related to XML Schema based processing, not only for the + original scope. + features: + - XSD 1.0 and XSD 1.1 validator and decoder +outputTypes: + - JSON From df939b9ceac07440ffad41a476a9bb9854039011 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 28 Aug 2019 09:33:49 +0200 Subject: [PATCH 02/36] Fix xs:alternative and xs:keyref referefence building --- doc/conf.py | 2 +- publiccode.yml | 7 +++--- setup.py | 2 +- xmlschema/__init__.py | 2 +- xmlschema/validators/elements.py | 35 +++++++++++++++++++++++++++--- xmlschema/validators/identities.py | 4 ++-- 6 files changed, 40 insertions(+), 12 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 19559b7..c0eed6c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ author = 'Davide Brunato' # The short X.Y version. version = '1.0' # The full version, including alpha/beta/rc tags. -release = '1.0.14' +release = '1.0.15' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/publiccode.yml b/publiccode.yml index 1702b5f..bd8ed3f 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-08-27' -softwareVersion: v1.0.14 +releaseDate: '2019-xx-xx' +softwareVersion: v1.0.15 developmentStatus: stable platforms: - linux @@ -35,10 +35,9 @@ localisation: availableLanguages: - en it: + countryExtensionVersion: '0.2' riuso: codiceIPA: sissa - piattaforme: - spid: false description: en: genericName: xmlschema diff --git a/setup.py b/setup.py index 264e343..a4dc551 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ class InstallCommand(install): setup( name='xmlschema', - version='1.0.14', + version='1.0.15', install_requires=['elementpath~=1.2.0'], packages=['xmlschema'], include_package_data=True, diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 8702015..80685c0 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -29,7 +29,7 @@ from .validators import ( XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11 ) -__version__ = '1.0.14' +__version__ = '1.0.15' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2016-2019, SISSA" diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 88c1500..4da22f2 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -838,6 +838,26 @@ class Xsd11Element(XsdElement): return self.schema.target_namespace return self._target_namespace + def iter_components(self, xsd_classes=None): + if xsd_classes is None: + yield self + for obj in self.identities.values(): + yield obj + else: + if isinstance(self, xsd_classes): + yield self + for obj in self.identities.values(): + if isinstance(obj, xsd_classes): + yield obj + + for alt in self.alternatives: + for obj in alt.iter_components(xsd_classes): + yield obj + + if self.ref is None and self.type.parent is not None: + for obj in self.type.iter_components(xsd_classes): + yield obj + def get_type(self, elem): if not self.alternatives: return self.type @@ -849,10 +869,12 @@ class Xsd11Element(XsdElement): else: elem = etree_element(elem.tag) - for alt in self.alternatives: - if alt.type is not None and \ - alt.token.boolean_value(list(alt.token.select(context=XPathContext(root=elem)))): + for alt in filter(lambda x: x.type is not None, self.alternatives): + if alt.token is None: return alt.type + elif alt.token.boolean_value(list(alt.token.select(context=XPathContext(root=elem)))): + return alt.type + return self.type def is_overlap(self, other): @@ -986,3 +1008,10 @@ class XsdAlternative(XsdComponent): @property def validation_attempted(self): return 'full' if self.built else self.type.validation_attempted + + def iter_components(self, xsd_classes=None): + if xsd_classes is None or isinstance(self, xsd_classes): + yield self + if self.type is not None and self.type.parent is not None: + for obj in self.type.iter_components(xsd_classes): + yield obj diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 6f8ba97..790294a 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -208,7 +208,7 @@ class XsdIdentity(XsdComponent): @property def built(self): - return bool(self.fields and self.selector) + return self.selector is not None def __call__(self, *args, **kwargs): for error in self.validator(*args, **kwargs): @@ -296,7 +296,7 @@ class XsdKeyref(XsdIdentity): @property def built(self): - return bool(self.fields and self.selector and self.refer) + return self.selector is not None and self.refer is not None def get_refer_values(self, elem): values = set() From f5afa915fc525f2f69a81898f4f9807510b5d983 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 31 Aug 2019 07:22:44 +0200 Subject: [PATCH 03/36] Expand model tests with XMLSchema11 class --- xmlschema/tests/test_models.py | 105 +++++++++++++++++++---------- xmlschema/validators/facets.py | 9 ++- xmlschema/validators/identities.py | 2 + xmlschema/validators/wildcards.py | 2 +- 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py index 5ec1566..60618e8 100644 --- a/xmlschema/tests/test_models.py +++ b/xmlschema/tests/test_models.py @@ -14,6 +14,7 @@ This module runs tests concerning model groups validation. """ import unittest +from xmlschema import XMLSchema10, XMLSchema11 from xmlschema.validators import ModelVisitor from xmlschema.compat import ordered_dict_class from xmlschema.tests import casepath, XsdValidatorTestCase @@ -150,9 +151,9 @@ class TestModelValidation(XsdValidatorTestCase): self.check_stop(model) # is optional self.assertIsNone(model.element) - # --- XSD 1.0 schema --- + # --- XSD 1.0/1.1 meta-schema models --- - def test_simple_derivation_model(self): + def test_meta_simple_derivation_model(self): """ @@ -162,7 +163,7 @@ class TestModelValidation(XsdValidatorTestCase): """ - group = self.schema_class.meta_schema.groups['simpleDerivation'] + group = XMLSchema10.meta_schema.groups['simpleDerivation'] model = ModelVisitor(group) self.check_advance_true(model) # match @@ -185,8 +186,9 @@ class TestModelValidation(XsdValidatorTestCase): self.check_advance_false(model, [(group, 0, group[:])]) # not match with self.assertIsNone(model.element) - def test_simple_restriction_model(self): + def test_meta_simple_restriction_model(self): """ + @@ -210,25 +212,38 @@ class TestModelValidation(XsdValidatorTestCase): + + + + + + + + + + + """ # Sequence with an optional single element and an optional unlimited choice. group = self.schema_class.meta_schema.groups['simpleRestrictionModel'] model = ModelVisitor(group) - self.assertEqual(model.element, group[0]) - self.check_advance_true(model) # match - self.assertEqual(model.element, group[1][0][0]) - self.check_advance_false(model) # do not match - self.assertEqual(model.element, group[1][0][1]) - self.check_advance_false(model) # do not match - self.assertEqual(model.element, group[1][0][2]) - self.check_advance_true(model) # match - self.assertEqual(model.element, group[1][0][0]) - for _ in range(12): - self.check_advance_false(model) # no match for all the inner choice group "xs:facets" - self.assertIsNone(model.element) - def test_schema_model(self): + if self.schema_class.XSD_VERSION == '1.0': + self.assertEqual(model.element, group[0]) + self.check_advance_true(model) # match + self.assertEqual(model.element, group[1][0][0]) + self.check_advance_false(model) # do not match + self.assertEqual(model.element, group[1][0][1]) + self.check_advance_false(model) # do not match + self.assertEqual(model.element, group[1][0][2]) + self.check_advance_true(model) # match + self.assertEqual(model.element, group[1][0][0]) + for _ in range(12): + self.check_advance_false(model) # no match for all the inner choice group "xs:facets" + self.assertIsNone(model.element) + + def test_meta_schema_top_model(self): """ @@ -288,7 +303,7 @@ class TestModelValidation(XsdValidatorTestCase): self.check_advance_true(model) # match self.assertIsNone(model.element) - def test_attr_declaration(self): + def test_meta_attr_declarations_group(self): """ @@ -322,7 +337,7 @@ class TestModelValidation(XsdValidatorTestCase): self.check_advance(model, match) self.assertEqual(model.element, group[1]) - def test_complex_type_model(self): + def test_meta_complex_type_model(self): """ @@ -343,6 +358,20 @@ class TestModelValidation(XsdValidatorTestCase): + + + + + + + + + + + + + + """ group = self.schema_class.meta_schema.groups['complexTypeModel'] @@ -357,27 +386,31 @@ class TestModelValidation(XsdValidatorTestCase): self.check_advance_true(model) # match self.assertIsNone(model.element) - model.restart() - self.assertEqual(model.element, group[0]) - for match in [False, False, False, False, True]: - self.check_advance(model, match) # match - self.check_stop(model) - self.assertIsNone(model.element) + if self.schema_class.XSD_VERSION == '1.0': + model.restart() + self.assertEqual(model.element, group[0]) + for match in [False, False, False, False, True]: + self.check_advance(model, match) # match + self.check_stop(model) + self.assertIsNone(model.element) - model.restart() - self.assertEqual(model.element, group[0]) - for match in [False, False, False, False, True, False, True, False, False, False]: - self.check_advance(model, match) # match, match - self.assertIsNone(model.element) + model.restart() + self.assertEqual(model.element, group[0]) + for match in [False, False, False, False, True, False, True, False, False, False]: + self.check_advance(model, match) # match, match + self.assertIsNone(model.element) - def test_schema_document_model(self): + def test_meta_schema_document_model(self): group = self.schema_class.meta_schema.elements['schema'].type.content_type # A schema model with a wrong tag model = ModelVisitor(group) - self.assertEqual(model.element, group[0][0]) - self.check_advance_false(model) # eg. anyAttribute - self.check_stop(model) + if self.schema_class.XSD_VERSION == '1.0': + self.assertEqual(model.element, group[0][0]) + self.check_advance_false(model) # eg. anyAttribute + self.check_stop(model) + else: + self.assertEqual(model.element, group[0][0][0]) # # Tests on schema test_cases/features/models/models.xsd @@ -540,6 +573,10 @@ class TestModelValidation(XsdValidatorTestCase): self.check_stop(model) +class TestModelValidation11(TestModelValidation): + schema_class = XMLSchema11 + + class TestModelBasedSorting(XsdValidatorTestCase): def test_sort_content(self): diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index d20fa48..3595941 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -681,9 +681,12 @@ class XsdAssertionFacet(XsdFacet): def __call__(self, value): self.parser.variables['value'] = value - if not self.token.evaluate(): - msg = "value is not true with test path %r." - yield XMLSchemaValidationError(self, value, reason=msg % self.path) + try: + if not self.token.evaluate(): + msg = "value is not true with test path %r." + yield XMLSchemaValidationError(self, value, reason=msg % self.path) + except ElementPathError as err: + yield XMLSchemaValidationError(self, value, reason=str(err)) XSD_10_FACETS_BUILDERS = { diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 790294a..bc6afd5 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -193,6 +193,8 @@ class XsdIdentity(XsdComponent): # Change the XSD context only if the path is changed current_path = path xsd_element = self.parent.find(path) + if not hasattr(xsd_element, 'tag'): + yield XMLSchemaValidationError(self, e, "{!r} is not an element".format(xsd_element)) xsd_fields = self.get_fields(xsd_element) if all(fld is None for fld in xsd_fields): diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index ac64398..47ae825 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -50,7 +50,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): # Parse namespace and processContents namespace = self.elem.get('namespace', '##any').strip() - if namespace == '##any': + if namespace == '##any' or namespace == '': pass elif namespace == '##other': self.namespace = [namespace] From 33b6db54e993f659e6422f95799fe92f06628d7b Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 3 Sep 2019 18:42:55 +0200 Subject: [PATCH 04/36] Refactoring of ModelVisitor and Xsd11AnyElement - Modify ModelVisitor for using XSD 1.1 xs:all groups - Remove inverted list from _subgroups item - Use occurs to store sum of xs:all items - Add precedences to Xsd11AnyElement for managing cases of overlapping wildcard (overlap with an adjacent element that also matches an item). --- .../tests/validators/test_complex_types.py | 26 ++++ xmlschema/validators/assertions.py | 3 +- xmlschema/validators/elements.py | 11 ++ xmlschema/validators/models.py | 113 +++++++++++------- xmlschema/validators/wildcards.py | 18 ++- 5 files changed, 126 insertions(+), 45 deletions(-) diff --git a/xmlschema/tests/validators/test_complex_types.py b/xmlschema/tests/validators/test_complex_types.py index 001599d..6f65c3b 100644 --- a/xmlschema/tests/validators/test_complex_types.py +++ b/xmlschema/tests/validators/test_complex_types.py @@ -273,6 +273,32 @@ class TestXsdComplexType(XsdValidatorTestCase): """) + def test_upa_violation_with_wildcard(self): + self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + + """, XMLSchemaModelError if self.schema_class.XSD_VERSION == '1.0' else None) + class TestXsd11ComplexType(TestXsdComplexType): diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 7609c02..d7e3746 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -80,7 +80,8 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.parse_error(err, elem=self.elem) self.token = self.parser.parse('true()') - def __call__(self, elem): + def __call__(self, elem, value=None): + self.parser.variables['value'] = value if not self.token.evaluate(XPathContext(root=elem)): msg = "expression is not true with test path %r." yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 4da22f2..e27dd12 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -511,6 +511,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) text = self.default if not xsd_type.is_simple(): + for assertion in xsd_type.assertions: + for error in assertion(elem, value=text): + yield self.validation_error(validation, error, **kwargs) + xsd_type = xsd_type.content_type if text is None: @@ -887,6 +891,13 @@ class Xsd11Element(XsdElement): for e in self.iter_substitutes(): if other.name == e.name or any(x is e for x in other.iter_substitutes()): return True + + elif isinstance(other, XsdAnyElement): + if other.is_matching(self.name, self.default_namespace): + return True + for e in self.maps.substitution_groups.get(self.name, ()): + if other.is_matching(e.name, self.default_namespace): + return True return False def is_consistent(self, other, strict=True): diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py index 6e4c8ae..e09ea7b 100644 --- a/xmlschema/validators/models.py +++ b/xmlschema/validators/models.py @@ -18,6 +18,7 @@ from ..compat import PY3, MutableSequence from ..exceptions import XMLSchemaValueError from .exceptions import XMLSchemaModelError, XMLSchemaModelDepthError from .xsdbase import ParticleMixin +from .wildcards import XsdAnyElement, Xsd11AnyElement MAX_MODEL_DEPTH = 15 """Limit depth for safe visiting of models""" @@ -233,15 +234,27 @@ class ModelGroup(MutableSequence, ParticleMixin): continue elif pe.parent is e.parent: if pe.parent.model in {'all', 'choice'}: - msg = "{!r} and {!r} overlap and are in the same {!r} group" - raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model)) + if isinstance(pe, Xsd11AnyElement) and not isinstance(e, XsdAnyElement): + pe.add_precedence(e, self) + elif isinstance(e, Xsd11AnyElement) and not isinstance(pe, XsdAnyElement): + e.add_precedence(pe, self) + else: + msg = "{!r} and {!r} overlap and are in the same {!r} group" + raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model)) elif pe.min_occurs == pe.max_occurs: continue - if not distinguishable_paths(previous_path + [pe], current_path + [e]): + if distinguishable_paths(previous_path + [pe], current_path + [e]): + continue + elif isinstance(pe, Xsd11AnyElement) and not isinstance(e, XsdAnyElement): + pe.add_precedence(e, self) + elif isinstance(e, Xsd11AnyElement) and not isinstance(pe, XsdAnyElement): + e.add_precedence(pe, self) + else: raise XMLSchemaModelError( self, "Unique Particle Attribution violation between {!r} and {!r}".format(pe, e) ) + paths[e.name] = e, current_path[:] @@ -322,8 +335,7 @@ class ModelVisitor(MutableSequence): :ivar occurs: the Counter instance for keeping track of occurrences of XSD elements and groups. :ivar element: the current XSD element, initialized to the first element of the model. :ivar group: the current XSD model group, initialized to *root* argument. - :ivar iterator: the current XSD group iterator. - :ivar items: the current XSD group unmatched items. + :ivar items: the current XSD group's items iterator. :ivar match: if the XSD group has an effective item match. """ def __init__(self, root): @@ -331,7 +343,7 @@ class ModelVisitor(MutableSequence): self.occurs = Counter() self._subgroups = [] self.element = None - self.group, self.iterator, self.items, self.match = root, iter(root), root[::-1], False + self.group, self.items, self.match = root, iter(root), False self._start() def __str__(self): @@ -367,17 +379,17 @@ class ModelVisitor(MutableSequence): del self._subgroups[:] self.occurs.clear() self.element = None - self.group, self.iterator, self.items, self.match = self.root, iter(self.root), self.root[::-1], False + self.group, self.items, self.match = self.root, iter(self.root), False def _start(self): while True: - item = next(self.iterator, None) + item = next(self.items, None) if item is None or not isinstance(item, ModelGroup): self.element = item break elif item: - self.append((self.group, self.iterator, self.items, self.match)) - self.group, self.iterator, self.items, self.match = item, iter(item), item[::-1], False + self.append((self.group, self.items, self.match)) + self.group, self.items, self.match = item, iter(item), False @property def expected(self): @@ -385,12 +397,19 @@ class ModelVisitor(MutableSequence): Returns the expected elements of the current and descendant groups. """ expected = [] - for item in reversed(self.items): - if isinstance(item, ModelGroup): - expected.extend(item.iter_elements()) + if self.group.model == 'choice': + items = self.group + elif self.group.model == 'all': + items = (e for e in self.group if e.min_occurs > self.occurs[e]) + else: + items = (e for e in self.group if e.min_occurs > self.occurs[e]) + + for e in items: + if isinstance(e, ModelGroup): + expected.extend(e.iter_elements()) else: - expected.append(item) - expected.extend(item.maps.substitution_groups.get(item.name, ())) + expected.append(e) + expected.extend(e.maps.substitution_groups.get(e.name, ())) return expected def restart(self): @@ -417,7 +436,7 @@ class ModelVisitor(MutableSequence): or for the current group, `False` otherwise. """ if isinstance(item, ModelGroup): - self.group, self.iterator, self.items, self.match = self.pop() + self.group, self.items, self.match = self.pop() item_occurs = occurs[item] model = self.group.model @@ -426,29 +445,21 @@ class ModelVisitor(MutableSequence): if model == 'choice': occurs[item] = 0 occurs[self.group] += 1 - self.iterator, self.match = iter(self.group), False - else: - if model == 'all': - self.items.remove(item) - else: - self.items.pop() - if not self.items: - self.occurs[self.group] += 1 + self.items, self.match = iter(self.group), False + elif model == 'sequence' and item is self.group[-1]: + self.occurs[self.group] += 1 return item.is_missing(item_occurs) elif model == 'sequence': if self.match: - self.items.pop() - if not self.items: + if item is self.group[-1]: occurs[self.group] += 1 return not item.is_emptiable() elif item.is_emptiable(): - self.items.pop() return False elif self.group.min_occurs <= occurs[self.group] or self: return stop_item(self.group) else: - self.items.pop() return True element, occurs = self.element, self.occurs @@ -460,6 +471,8 @@ class ModelVisitor(MutableSequence): self.match = True if not element.is_over(occurs[element]): return + + obj = None try: if stop_item(element): yield element, occurs[element], [element] @@ -468,35 +481,51 @@ class ModelVisitor(MutableSequence): while self.group.is_over(occurs[self.group]): stop_item(self.group) - obj = next(self.iterator, None) + obj = next(self.items, None) if obj is None: if not self.match: - if self.group.model == 'all' and all(e.min_occurs == 0 for e in self.items): - occurs[self.group] += 1 + if self.group.model == 'all': + for e in self.group: + occurs[e] = occurs[(e,)] + if all(e.min_occurs <= occurs[e] for e in self.group): + occurs[self.group] = 1 group, expected = self.group, self.expected if stop_item(group) and expected: yield group, occurs[group], expected - elif not self.items: - self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False - elif self.group.model == 'all': - self.iterator, self.match = iter(self.items), False - elif all(e.min_occurs == 0 for e in self.items): - self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False - occurs[self.group] += 1 + elif self.group.model != 'all': + self.items, self.match = iter(self.group), False + elif any(not e.is_over(occurs[e]) for e in self.group): + for e in self.group: + occurs[(e,)] += occurs[e] + self.items, self.match = (e for e in self.group if not e.is_over(occurs[e])), False + else: + for e in self.group: + occurs[(e,)] += occurs[e] + occurs[self.group] = 1 elif not isinstance(obj, ModelGroup): # XsdElement or XsdAnyElement self.element, occurs[obj] = obj, 0 return else: - self.append((self.group, self.iterator, self.items, self.match)) - self.group, self.iterator, self.items, self.match = obj, iter(obj), obj[::-1], False + self.append((self.group, self.items, self.match)) + self.group, self.items, self.match = obj, iter(obj), False occurs[obj] = 0 + if obj.model == 'all': + for e in obj: + occurs[(e,)] = 0 except IndexError: + # Model visit ended self.element = None - if self.group.is_missing(occurs[self.group]) and self.items: - yield self.group, occurs[self.group], self.expected + if self.group.is_missing(occurs[self.group]): + if self.group.model == 'choice': + yield self.group, occurs[self.group], self.expected + elif self.group.model == 'sequence': + if obj is not None: + yield self.group, occurs[self.group], self.expected + elif any(e.min_occurs > occurs[e] for e in self.group): + yield self.group, occurs[self.group], self.expected def sort_content(self, content, restart=True): if restart: diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 47ae825..cb69afb 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -519,6 +519,8 @@ class Xsd11AnyElement(XsdAnyElement): Content: (annotation?) """ + precedences = () + def _parse(self): super(Xsd11AnyElement, self)._parse() self._parse_not_constraints() @@ -534,11 +536,15 @@ class Xsd11AnyElement(XsdAnyElement): name = '{%s}%s' % (default_namespace, name) namespace = default_namespace - if '##defined' in self.not_qname and name in self.maps.elements: + if group in self.precedences and \ + any(e.is_matching(name) for e in self.precedences[group]): + return False + elif '##defined' in self.not_qname and name in self.maps.elements: if self.maps.elements[name].schema is self.schema: return False if group and '##definedSibling' in self.not_qname: - if any(e is not self and e.match(name, default_namespace) for e in group.iter_elements()): + if any(e.is_matching(name) for e in group.iter_elements() + if not isinstance(e, XsdAnyElement)): return False return name not in self.not_qname and self.is_namespace_allowed(namespace) @@ -548,6 +554,14 @@ class Xsd11AnyElement(XsdAnyElement): xsd_element = self.matched_element(other.name, other.default_namespace) return xsd_element is None or other.is_consistent(xsd_element, False) + def add_precedence(self, other, group): + if not self.precedences: + self.precedences = {} + try: + self.precedences[group].append(other) + except KeyError: + self.precedences[group] = [other] + class Xsd11AnyAttribute(XsdAnyAttribute): """ From 4f5c819d0f5c12f7d24551608a7c9fd9e08d1a7f Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 4 Sep 2019 23:13:50 +0200 Subject: [PATCH 05/36] Fix XSD 1.1 wildcard matching - Add option occurs to is_matching() method - Create a custom match() method with additional option resolve - Replace matched_element() with match(name, resolve=True) --- xmlschema/converters.py | 10 ++-- xmlschema/validators/elements.py | 6 +-- xmlschema/validators/groups.py | 20 ++++--- xmlschema/validators/wildcards.py | 87 +++++++++++++++++++++++++------ xmlschema/validators/xsdbase.py | 9 ++-- 5 files changed, 94 insertions(+), 38 deletions(-) diff --git a/xmlschema/converters.py b/xmlschema/converters.py index ec761a2..5e0e90a 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -359,7 +359,7 @@ class XMLSchemaConverter(NamespaceMapper): else: ns_name = self.unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.matched_element(ns_name) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) @@ -456,7 +456,7 @@ class UnorderedConverter(XMLSchemaConverter): # `value` is a list but not a list of lists or list of dicts. ns_name = self.unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.matched_element(ns_name) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content_lu[self.unmap_qname(name)] = [value] @@ -576,7 +576,7 @@ class ParkerConverter(XMLSchemaConverter): content.append((ns_name, item)) else: for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.matched_element(ns_name) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) @@ -721,7 +721,7 @@ class BadgerFishConverter(XMLSchemaConverter): else: ns_name = unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.matched_element(ns_name) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) @@ -841,7 +841,7 @@ class AbderaConverter(XMLSchemaConverter): else: ns_name = unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.matched_element(ns_name) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index e27dd12..8ec63b0 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -659,7 +659,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return True return False - def match(self, name, default_namespace=None, group=None): + def match(self, name, default_namespace=None, **kwargs): if default_namespace and name[0] != '{': name = '{%s}%s' % (default_namespace, name) @@ -670,8 +670,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if name in xsd_element.names: return xsd_element - matched_element = match - def is_restriction(self, other, check_occurs=True): if isinstance(other, XsdAnyElement): if self.min_occurs == self.max_occurs == 0: @@ -904,7 +902,7 @@ class Xsd11Element(XsdElement): if isinstance(other, XsdAnyElement): if other.process_contents == 'skip': return True - xsd_element = other.matched_element(self.name, self.default_namespace) + xsd_element = other.match(self.name, self.default_namespace, resolve=True) return xsd_element is None or self.is_consistent(xsd_element, False) if self.name == other.name: diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index d295fae..9f12f9f 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -538,11 +538,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if callable(child.tag): continue # child is a - if self.interleave and self.interleave.is_matching(child.tag, default_namespace, self): + if self.interleave and self.interleave.is_matching(child.tag, default_namespace, group=self): xsd_element = self.interleave else: while model.element is not None: - xsd_element = model.element.match(child.tag, default_namespace, self) + xsd_element = model.element.match( + child.tag, default_namespace, group=self, occurs=model.occurs + ) if xsd_element is None: for particle, occurs, expected in model.advance(False): errors.append((index, particle, occurs, expected)) @@ -557,11 +559,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): errors.append((index, particle, occurs, expected)) break else: - if self.suffix and self.suffix.is_matching(child.tag, default_namespace, self): + if self.suffix and self.suffix.is_matching(child.tag, default_namespace, group=self): xsd_element = self.suffix else: for xsd_element in self.iter_elements(): - if xsd_element.is_matching(child.tag, default_namespace, self): + if xsd_element.is_matching(child.tag, default_namespace, group=self): if not model_broken: errors.append((index, xsd_element, 0, [])) model_broken = True @@ -653,12 +655,14 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): cdata_index += 1 continue - if self.interleave and self.interleave.is_matching(name, default_namespace, self): + if self.interleave and self.interleave.is_matching(name, default_namespace, group=self): xsd_element = self.interleave value = get_qname(default_namespace, name), value else: while model.element is not None: - xsd_element = model.element.match(name, default_namespace, self) + xsd_element = model.element.match( + name, default_namespace, group=self, occurs=model.occurs + ) if xsd_element is None: for particle, occurs, expected in model.advance(): errors.append((index - cdata_index, particle, occurs, expected)) @@ -670,13 +674,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): errors.append((index - cdata_index, particle, occurs, expected)) break else: - if self.suffix and self.suffix.is_matching(name, default_namespace, self): + if self.suffix and self.suffix.is_matching(name, default_namespace, group=self): xsd_element = self.suffix value = get_qname(default_namespace, name), value else: errors.append((index - cdata_index, self, 0, [])) for xsd_element in self.iter_elements(): - if not xsd_element.is_matching(name, default_namespace, self): + if not xsd_element.is_matching(name, default_namespace, group=self): continue elif isinstance(xsd_element, XsdAnyElement): value = get_qname(default_namespace, name), value diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index cb69afb..7666d0f 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -150,7 +150,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): def built(self): return True - def is_matching(self, name, default_namespace=None, group=None): + def is_matching(self, name, default_namespace=None, **kwargs): if name is None: return False elif not name or name[0] == '{': @@ -342,15 +342,30 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): def is_emptiable(self): return self.min_occurs == 0 or self.process_contents != 'strict' - def matched_element(self, name, default_namespace=None, group=None): - if self.is_matching(name, default_namespace, group): - try: - if name[0] != '{' and default_namespace: - return self.maps.lookup_element('{%s}%s' % (default_namespace, name)) - else: - return self.maps.lookup_element(name) - except LookupError: - pass + def match(self, name, default_namespace=None, resolve=False, **kwargs): + """ + Returns the element wildcard if name is matching the name provided + as argument, `None` otherwise. + + :param name: a local or fully-qualified name. + :param default_namespace: used when it's not `None` and not empty for \ + completing local name arguments. + :param resolve: when `True` it doesn't return the wildcard but try to \ + resolve and return the element matching the name. + :param kwargs: additional options used by XSD 1.1 xs:any wildcards. + """ + if not self.is_matching(name, default_namespace, **kwargs): + return + elif not resolve: + return self + + try: + if name[0] != '{' and default_namespace: + return self.maps.lookup_element('{%s}%s' % (default_namespace, name)) + else: + return self.maps.lookup_element(name) + except LookupError: + pass def __iter__(self): return iter(()) @@ -457,6 +472,31 @@ class XsdAnyAttribute(XsdWildcard): """ _ADMITTED_TAGS = {XSD_ANY_ATTRIBUTE} + def match(self, name, default_namespace=None, resolve=False, **kwargs): + """ + Returns the attribute wildcard if name is matching the name provided + as argument, `None` otherwise. + + :param name: a local or fully-qualified name. + :param default_namespace: used when it's not `None` and not empty for \ + completing local name arguments. + :param resolve: when `True` it doesn't return the wildcard but try to \ + resolve and return the attribute matching the name. + :param kwargs: additional options that can be used by certain components. + """ + if not self.is_matching(name, default_namespace, **kwargs): + return + elif not resolve: + return self + + try: + if name[0] != '{' and default_namespace: + return self.maps.lookup_attribute('{%s}%s' % (default_namespace, name)) + else: + return self.maps.lookup_attribute(name) + except LookupError: + pass + def iter_decode(self, attribute, validation='lax', **kwargs): if self.process_contents == 'skip': return @@ -525,7 +565,18 @@ class Xsd11AnyElement(XsdAnyElement): super(Xsd11AnyElement, self)._parse() self._parse_not_constraints() - def is_matching(self, name, default_namespace=None, group=None): + def is_matching(self, name, default_namespace=None, group=None, occurs=None): + """ + Returns `True` if the component name is matching the name provided as argument, + `False` otherwise. For XSD elements the matching is extended to substitutes. + + :param name: a local or fully-qualified name. + :param default_namespace: used if it's not None and not empty for completing \ + the name argument in case it's a local name. + :param group: used only by XSD 1.1 any element wildcards to verify siblings in \ + case of ##definedSibling value in notQName attribute. + :param occurs: a Counter instance for verify model occurrences counting. + """ if name is None: return False elif not name or name[0] == '{': @@ -536,10 +587,14 @@ class Xsd11AnyElement(XsdAnyElement): name = '{%s}%s' % (default_namespace, name) namespace = default_namespace - if group in self.precedences and \ - any(e.is_matching(name) for e in self.precedences[group]): - return False - elif '##defined' in self.not_qname and name in self.maps.elements: + if group in self.precedences: + if not occurs: + if any(e.is_matching(name) for e in self.precedences[group]): + return False + elif any(e.is_matching(name) and not e.is_over(occurs[e]) for e in self.precedences[group]): + return False + + if '##defined' in self.not_qname and name in self.maps.elements: if self.maps.elements[name].schema is self.schema: return False if group and '##definedSibling' in self.not_qname: @@ -551,7 +606,7 @@ class Xsd11AnyElement(XsdAnyElement): def is_consistent(self, other): if isinstance(other, XsdAnyElement) or self.process_contents == 'skip': return True - xsd_element = self.matched_element(other.name, other.default_namespace) + xsd_element = self.match(other.name, other.default_namespace, resolve=True) return xsd_element is None or other.is_consistent(xsd_element, False) def add_precedence(self, other, group): diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 044ce50..10dcb78 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -427,7 +427,7 @@ class XsdComponent(XsdValidator): def built(self): raise NotImplementedError - def is_matching(self, name, default_namespace=None, group=None): + def is_matching(self, name, default_namespace=None, **kwargs): """ Returns `True` if the component name is matching the name provided as argument, `False` otherwise. For XSD elements the matching is extended to substitutes. @@ -435,8 +435,7 @@ class XsdComponent(XsdValidator): :param name: a local or fully-qualified name. :param default_namespace: used if it's not None and not empty for completing the name \ argument in case it's a local name. - :param group: used only by XSD 1.1 any element wildcards to verify siblings in \ - case of ##definedSibling value in notQName attribute. + :param kwargs: additional options that can be used by certain components. """ if not name: return self.name == name @@ -448,9 +447,9 @@ class XsdComponent(XsdValidator): qname = '{%s}%s' % (default_namespace, name) return self.qualified_name == qname or not self.qualified and self.local_name == name - def match(self, name, default_namespace=None, group=None): + def match(self, name, default_namespace=None, **kwargs): """Returns the component if its name is matching the name provided as argument, `None` otherwise.""" - return self if self.is_matching(name, default_namespace, group) else None + return self if self.is_matching(name, default_namespace, **kwargs) else None def get_global(self): """Returns the global XSD component that contains the component instance.""" From c530fda102881f8f54d9ecc9f8f31b290c03ab3f Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 6 Sep 2019 06:42:34 +0200 Subject: [PATCH 06/36] Fix openContent validation ad regex character group parsing - Put interleave mode after model match for precedence - Fix regex parsing of character group ending with '-' - Fix complexType mixed content extension --- xmlschema/codepoints.py | 4 +- xmlschema/tests/test_regex.py | 4 + xmlschema/validators/complex_types.py | 259 ++++++++++++++++---------- xmlschema/validators/groups.py | 177 +++++++++--------- xmlschema/validators/schema.py | 25 ++- xmlschema/validators/wildcards.py | 2 +- 6 files changed, 273 insertions(+), 198 deletions(-) diff --git a/xmlschema/codepoints.py b/xmlschema/codepoints.py index f38c299..efbbc9e 100644 --- a/xmlschema/codepoints.py +++ b/xmlschema/codepoints.py @@ -194,7 +194,7 @@ def iterparse_character_group(s, expand_ranges=False): raise XMLSchemaRegexError("bad character %r at position %d" % (s[k], k)) escaped = on_range = False char = s[k] - if k >= length - 1 or s[k + 1] != '-': + if k >= length - 2 or s[k + 1] != '-': yield ord(char) elif s[k] == '\\': if escaped: @@ -209,7 +209,7 @@ def iterparse_character_group(s, expand_ranges=False): yield ord('\\') on_range = False char = s[k] - if k >= length - 1 or s[k + 1] != '-': + if k >= length - 2 or s[k + 1] != '-': yield ord(char) if escaped: yield ord('\\') diff --git a/xmlschema/tests/test_regex.py b/xmlschema/tests/test_regex.py index 2ea8f7d..b5bb6c8 100644 --- a/xmlschema/tests/test_regex.py +++ b/xmlschema/tests/test_regex.py @@ -390,6 +390,10 @@ class TestPatterns(unittest.TestCase): self.assertEqual(regex, r'^([^\w\W])$') self.assertRaises(XMLSchemaRegexError, get_python_regex, '[]') + def test_character_class_range(self): + regex = get_python_regex('[bc-]') + self.assertEqual(regex, r'^([\-bc])$') + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index a7362ef..ce6a12a 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -16,7 +16,6 @@ from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENC XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, \ XSD_OPEN_CONTENT, XSD_ASSERT from ..helpers import get_qname, local_name, get_xsd_derivation_attribute -from ..etree import etree_element from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError from .xsdbase import XsdType, ValidationMixin @@ -28,8 +27,6 @@ from .wildcards import XsdOpenContent XSD_MODEL_GROUP_TAGS = {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE} -SEQUENCE_ELEMENT = etree_element(XSD_SEQUENCE) - class XsdComplexType(XsdType, ValidationMixin): """ @@ -137,14 +134,10 @@ class XsdComplexType(XsdType, ValidationMixin): content_elem = self._parse_child_component(elem, strict=False) if content_elem is None or content_elem.tag in self._CONTENT_TAIL_TAGS: - # - # complexType with empty content - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + self.content_type = self.schema.create_empty_content_group(self) self._parse_content_tail(elem) elif content_elem.tag in {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - # - # complexType with child elements self.content_type = self.schema.BUILDERS.group_class(content_elem, self.schema, self) self._parse_content_tail(elem) @@ -202,7 +195,7 @@ class XsdComplexType(XsdType, ValidationMixin): self.open_content = XsdOpenContent(content_elem, self.schema, self) if content_elem is elem[-1]: - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + self.content_type = self.schema.create_empty_content_group(self) else: for index, child in enumerate(elem): if content_elem is not child: @@ -210,7 +203,7 @@ class XsdComplexType(XsdType, ValidationMixin): elif elem[index + 1].tag in {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: self.content_type = self.schema.BUILDERS.group_class(elem[index + 1], self.schema, self) else: - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + self.content_type = self.schema.self.schema.create_empty_content_group(self) break self._parse_content_tail(elem) @@ -340,9 +333,7 @@ class XsdComplexType(XsdType, ValidationMixin): self.parse_error(msg.format(base_type.content_type.model, content_type.model)) break else: - # Empty content model - content_type = self.schema.BUILDERS.group_class(elem, self.schema, self) - content_type.model = base_type.content_type.model + content_type = self.schema.create_empty_content_group(self, base_type.content_type.model) if base_type.is_element_only() and content_type.mixed: self.parse_error( @@ -371,101 +362,74 @@ class XsdComplexType(XsdType, ValidationMixin): if 'extension' in base_type.final: self.parse_error("the base type is not derivable by extension") - # Parse openContent for group_elem in filter(lambda x: x.tag != XSD_ANNOTATION, elem): - if group_elem.tag != XSD_OPEN_CONTENT: - break - self.open_content = XsdOpenContent(group_elem, self.schema, self) - try: - self.open_content.any_element.extend(base_type.open_content.any_element) - except AttributeError: - pass + break else: group_elem = None - if not self.open_content: - if self.schema.default_open_content: - self.open_content = self.schema.default_open_content - elif getattr(base_type, 'open_content', None): - self.open_content = base_type.open_content - - try: - if self.open_content and not base_type.open_content.is_restriction(self.open_content): - msg = "{!r} is not an extension of the base type {!r}" - self.parse_error(msg.format(self.open_content, base_type.open_content)) - except AttributeError: - pass - if base_type.is_empty(): - # Empty model extension: don't create a nested group. - if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: - self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) - else: - # Empty content model - self.content_type = self.schema.BUILDERS.group_class(elem, self.schema, self) - else: - # Create a dummy sequence content type if the base type has not empty content model - sequence_elem = etree_element(XSD_SEQUENCE) - sequence_elem.text = '\n ' - content_type = self.schema.BUILDERS.group_class(sequence_elem, self.schema, self) + if not base_type.mixed: + # Empty element-only model extension: don't create a nested group. + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + elif base_type.is_simple() or base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + else: + self.content_type = self.schema.create_empty_content_group( + parent=self, model=base_type.content_type.model + ) + elif base_type.mixed: + # Empty mixed model extension + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(self.schema.create_empty_content_group(self.content_type)) - if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: - # Illegal derivation from a simple content. Always forbidden in XSD 1.1 - # for XSD 1.0 applies only with not empty base and not empty extension. - if base_type.is_simple() or base_type.has_simple_content() and self.xsd_version == '1.0': - self.parse_error("base %r is simple or has a simple content." % base_type, elem) - base_type = self.maps.types[XSD_ANY_TYPE] + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self.content_type) + if not self.mixed: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + else: + group = self.schema.create_empty_content_group(self) - group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + self.content_type.append(group) + self.content_type.elem.append(base_type.content_type.elem) + self.content_type.elem.append(group.elem) - if self.xsd_version == '1.0': - if group.model == 'all': - self.parse_error("cannot extend a complex content with xs:all") - if base_type.content_type.model == 'all' and group.model == 'sequence': - self.parse_error("xs:sequence cannot extend xs:all") + elif group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + # Derivation from a simple content is forbidden if base type is not empty. + if base_type.is_simple() or base_type.has_simple_content(): + self.parse_error("base %r is simple or has a simple content." % base_type, elem) + base_type = self.any_type - elif base_type.content_type.model == 'all': - if group.model == 'sequence': - self.parse_error("xs:sequence cannot extend xs:all") - elif group.model == 'all': - if base_type.content_type.min_occurs != group.min_occurs: - self.parse_error( - "when xs:all extends xs:all the minOccurs must be the same" - ) - if base_type.content_type.mixed and not base_type.content_type: - self.parse_error( - "xs:all cannot extend an xs:all with mixed empty content" - ) + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) - elif base_type.content_type.model == 'sequence': - if group.model == 'all': - self.parse_error("xs:all cannot extend a not empty xs:sequence") - elif group.model == 'all': - self.parse_error("xs:all cannot extend a not empty xs:choice") + if group.model == 'all': + self.parse_error("cannot extend a complex content with xs:all") + if base_type.content_type.model == 'all' and group.model == 'sequence': + self.parse_error("xs:sequence cannot extend xs:all") - content_type.append(base_type.content_type) - content_type.append(group) - sequence_elem.append(base_type.content_type.elem) - sequence_elem.append(group.elem) - - if base_type.content_type.model == 'all' and base_type.content_type and group: - if self.xsd_version == '1.0': - self.parse_error("XSD 1.0 does not allow extension of a not empty 'all' model group") - elif group.model != 'all': - self.parse_error("cannot extend a not empty 'all' model group with a different model") - - if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: - self.parse_error("base has a different content type (mixed=%r) and the " - "extension group is not empty." % base_type.mixed, elem) - - elif not base_type.is_simple() and not base_type.has_simple_content(): - content_type.append(base_type.content_type) - sequence_elem.append(base_type.content_type.elem) - if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: - self.parse_error("extended type has a mixed content but the base is element-only", elem) + content_type = self.schema.create_empty_content_group(self) + content_type.append(base_type.content_type) + content_type.append(group) + content_type.elem.append(base_type.content_type.elem) + content_type.elem.append(group.elem) + if base_type.content_type.model == 'all' and base_type.content_type and group: + self.parse_error("XSD 1.0 does not allow extension of a not empty 'all' model group") + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) self.content_type = content_type + elif not base_type.is_simple() and not base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(base_type.content_type) + self.content_type.elem.append(base_type.content_type.elem) + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: + self.parse_error("extended type has a mixed content but the base is element-only", elem) + else: + self.content_type = self.schema.create_empty_content_group(self) + self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes) @property @@ -752,8 +716,111 @@ class Xsd11ComplexType(XsdComplexType): # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#sec-cos-ct-extends if base_type.is_simple() or base_type.has_simple_content(): self.parse_error("base %r is simple or has a simple content." % base_type, elem) - base_type = self.maps.types[XSD_ANY_TYPE] - super(Xsd11ComplexType, self)._parse_complex_content_extension(elem, base_type) + base_type = self.any_type + + if 'extension' in base_type.final: + self.parse_error("the base type is not derivable by extension") + + # Parse openContent + for group_elem in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + if group_elem.tag != XSD_OPEN_CONTENT: + break + self.open_content = XsdOpenContent(group_elem, self.schema, self) + try: + self.open_content.any_element.extend(base_type.open_content.any_element) + except AttributeError: + pass + else: + group_elem = None + + if not self.open_content: + if self.schema.default_open_content: + self.open_content = self.schema.default_open_content + elif getattr(base_type, 'open_content', None): + self.open_content = base_type.open_content + + try: + if self.open_content and not base_type.open_content.is_restriction(self.open_content): + msg = "{!r} is not an extension of the base type {!r}" + self.parse_error(msg.format(self.open_content, base_type.open_content)) + except AttributeError: + pass + + if not base_type.content_type: + if not base_type.mixed: + # Empty element-only model extension: don't create a nested sequence group. + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + else: + self.content_type = self.schema.create_empty_content_group( + parent=self, model=base_type.content_type.model + ) + elif base_type.mixed: + # Empty mixed model extension + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(self.schema.create_empty_content_group(self.content_type)) + + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self.content_type) + if not self.mixed: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + if group.model == 'all': + self.parse_error("cannot extend an empty mixed content with an xs:all") + else: + group = self.schema.create_empty_content_group(self) + + self.content_type.append(group) + self.content_type.elem.append(base_type.content_type.elem) + self.content_type.elem.append(group.elem) + + elif group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + + if base_type.content_type.model != 'all': + content_type = self.schema.create_empty_content_group(self) + content_type.append(base_type.content_type) + content_type.elem.append(base_type.content_type.elem) + + if group.model == 'all': + msg = "xs:all cannot extend a not empty xs:%s" + self.parse_error(msg % base_type.content_type.model) + else: + content_type.append(group) + content_type.elem.append(group.elem) + else: + content_type = self.schema.create_empty_content_group(self, model='all') + content_type.extend(base_type.content_type) + content_type.elem.extend(base_type.content_type.elem) + + if not group: + pass + elif group.model != 'all': + self.parse_error("cannot extend a not empty 'all' model group with a different model") + elif base_type.content_type.min_occurs != group.min_occurs: + self.parse_error("when extend an xs:all group minOccurs must be the same") + elif base_type.mixed and not base_type.content_type: + self.parse_error("cannot extend an xs:all group with mixed empty content") + else: + content_type.extend(group) + content_type.elem.extend(group.elem) + + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + + self.content_type = content_type + + elif not base_type.is_simple() and not base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(base_type.content_type) + self.content_type.elem.append(base_type.content_type.elem) + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: + self.parse_error("extended type has a mixed content but the base is element-only", elem) + else: + self.content_type = self.schema.create_empty_content_group(self) + + self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes) def _parse_content_tail(self, elem, **kwargs): self.attributes = self.schema.BUILDERS.attribute_group_class(elem, self.schema, self, **kwargs) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 9f12f9f..0fafa90 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -16,8 +16,8 @@ from __future__ import unicode_literals from ..compat import unicode_type from ..exceptions import XMLSchemaValueError from ..etree import etree_element -from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, \ - XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_ANY, XSD_RESTRICTION, XSD_EXTENSION +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ + XSD_CHOICE, XSD_ELEMENT, XSD_ANY from xmlschema.helpers import get_qname, local_name from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError @@ -80,9 +80,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): interleave = None # an Xsd11AnyElement in case of XSD 1.1 openContent with mode='interleave' suffix = None # an Xsd11AnyElement in case of openContent with mode='suffix' or 'interleave' - _ADMITTED_TAGS = { - XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE - } + _ADMITTED_TAGS = {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE} def __init__(self, elem, schema, parent): self._group = [] @@ -114,49 +112,53 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): def _parse(self): super(XsdGroup, self)._parse() self.clear() - elem = self.elem - self._parse_particle(elem) + self._parse_particle(self.elem) - if elem.tag == XSD_GROUP: - # Global group or reference - if self._parse_reference(): - try: - xsd_group = self.schema.maps.lookup_group(self.name) - except KeyError: - self.parse_error("missing group %r" % self.prefixed_name) - xsd_group = self.schema.create_any_content_group(self, self.name) - - if isinstance(xsd_group, tuple): - # Disallowed circular definition, substitute with any content group. - self.parse_error("Circular definitions detected for group %r:" % self.name, xsd_group[0]) - self.model = 'sequence' - self.mixed = True - self.append(self.schema.BUILDERS.any_element_class(ANY_ELEMENT, self.schema, self)) - else: - self.model = xsd_group.model - if self.model == 'all': - if self.max_occurs != 1: - self.parse_error("maxOccurs must be 1 for 'all' model groups") - if self.min_occurs not in (0, 1): - self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") - if self.xsd_version == '1.0' and isinstance(self.parent, XsdGroup): - self.parse_error("in XSD 1.0 the 'all' model group cannot be nested") - self.append(xsd_group) - self.ref = xsd_group - return + if self.elem.tag != XSD_GROUP: + # Local group (sequence|all|choice) + if 'name' in self.elem.attrib: + self.parse_error("attribute 'name' not allowed for a local group") + self._parse_content_model(self.elem) + elif self._parse_reference(): try: - self.name = get_qname(self.target_namespace, elem.attrib['name']) + xsd_group = self.schema.maps.lookup_group(self.name) except KeyError: - return + self.parse_error("missing group %r" % self.prefixed_name) + xsd_group = self.schema.create_any_content_group(self, self.name) + + if isinstance(xsd_group, tuple): + # Disallowed circular definition, substitute with any content group. + self.parse_error("Circular definitions detected for group %r:" % self.name, xsd_group[0]) + self.model = 'sequence' + self.mixed = True + self.append(self.schema.BUILDERS.any_element_class(ANY_ELEMENT, self.schema, self)) else: - content_model = self._parse_child_component(elem, strict=True) + self.model = xsd_group.model + if self.model == 'all': + if self.max_occurs != 1: + self.parse_error("maxOccurs must be 1 for 'all' model groups") + if self.min_occurs not in (0, 1): + self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") + if self.xsd_version == '1.0' and isinstance(self.parent, XsdGroup): + self.parse_error("in XSD 1.0 the 'all' model group cannot be nested") + self.append(xsd_group) + self.ref = xsd_group + + else: + attrib = self.elem.attrib + try: + self.name = get_qname(self.target_namespace, attrib['name']) + except KeyError: + pass + else: + content_model = self._parse_child_component(self.elem, strict=True) if self.parent is not None: self.parse_error("attribute 'name' not allowed for a local group") else: - if 'minOccurs' in elem.attrib: + if 'minOccurs' in attrib: self.parse_error("attribute 'minOccurs' not allowed for a global group") - if 'maxOccurs' in elem.attrib: + if 'maxOccurs' in attrib: self.parse_error("attribute 'maxOccurs' not allowed for a global group") if 'minOccurs' in content_model.attrib: self.parse_error( @@ -166,26 +168,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): self.parse_error( "attribute 'maxOccurs' not allowed for the model of a global group", content_model ) - if content_model.tag not in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - self.parse_error('unexpected tag %r' % content_model.tag, content_model) - return - elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - # Local group (sequence|all|choice) - if 'name' in elem.attrib: - self.parse_error("attribute 'name' not allowed for a local group") - content_model = elem - self.name = None - elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}: - self.name = self.model = None - return - else: - self.parse_error('unexpected tag %r' % elem.tag) - return + if content_model.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: + self._parse_content_model(content_model) + else: + self.parse_error('unexpected tag %r' % content_model.tag, content_model) - self._parse_content_model(elem, content_model) - - def _parse_content_model(self, elem, content_model): + def _parse_content_model(self, content_model): self.model = local_name(content_model.tag) if self.model == 'all': if self.max_occurs != 1: @@ -198,7 +187,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): # Builds inner elements and reference groups later, for avoids circularity. self.append((child, self.schema)) elif content_model.tag == XSD_ALL: - self.parse_error("'all' model can contains only elements.", elem) + self.parse_error("'all' model can contains only elements.") elif child.tag == XSD_ANY: self.append(XsdAnyElement(child, self.schema, self)) elif child.tag in (XSD_SEQUENCE, XSD_CHOICE): @@ -220,11 +209,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: self.append(xsd_group) elif self.redefine is None: - self.parse_error("Circular definition detected for group %r:" % self.name, elem) + self.parse_error("Circular definition detected for group %r:" % self.name) else: if child.get('minOccurs', '1') != '1' or child.get('maxOccurs', '1') != '1': self.parse_error( - "Redefined group reference cannot have minOccurs/maxOccurs other than 1:", elem + "Redefined group reference cannot have minOccurs/maxOccurs other than 1:" ) self.append(self.redefine) else: @@ -538,40 +527,42 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if callable(child.tag): continue # child is a - if self.interleave and self.interleave.is_matching(child.tag, default_namespace, group=self): - xsd_element = self.interleave - else: - while model.element is not None: - xsd_element = model.element.match( - child.tag, default_namespace, group=self, occurs=model.occurs - ) - if xsd_element is None: - for particle, occurs, expected in model.advance(False): - errors.append((index, particle, occurs, expected)) - model.clear() - model_broken = True # the model is broken, continues with raw decoding. - break - else: - continue + while model.element is not None: + xsd_element = model.element.match( + child.tag, default_namespace, group=self, occurs=model.occurs + ) + if xsd_element is None: + if self.interleave is not None and \ + self.interleave.is_matching(child.tag, default_namespace, self, model.occurs): + xsd_element = self.interleave break - for particle, occurs, expected in model.advance(True): + for particle, occurs, expected in model.advance(False): errors.append((index, particle, occurs, expected)) - break - else: - if self.suffix and self.suffix.is_matching(child.tag, default_namespace, group=self): - xsd_element = self.suffix + model.clear() + model_broken = True # the model is broken, continues with raw decoding. + break else: - for xsd_element in self.iter_elements(): - if xsd_element.is_matching(child.tag, default_namespace, group=self): - if not model_broken: - errors.append((index, xsd_element, 0, [])) - model_broken = True - break - else: - errors.append((index, self, 0, None)) - xsd_element = None - model_broken = True + continue + break + + for particle, occurs, expected in model.advance(True): + errors.append((index, particle, occurs, expected)) + break + else: + if self.suffix is not None and self.suffix.is_matching(child.tag, default_namespace, self): + xsd_element = self.suffix + else: + for xsd_element in self.iter_elements(): + if xsd_element.is_matching(child.tag, default_namespace, group=self): + if not model_broken: + errors.append((index, xsd_element, 0, [])) + model_broken = True + break + else: + errors.append((index, self, 0, None)) + xsd_element = None + model_broken = True if xsd_element is None or kwargs.get('no_depth'): # TODO: use a default decoder str-->str?? @@ -736,7 +727,7 @@ class Xsd11Group(XsdGroup): Content: (annotation?, (element | any | group)*) """ - def _parse_content_model(self, elem, content_model): + def _parse_content_model(self, content_model): self.model = local_name(content_model.tag) if self.model == 'all': if self.max_occurs not in (0, 1): @@ -770,11 +761,11 @@ class Xsd11Group(XsdGroup): self.pop() elif self.redefine is None: - self.parse_error("Circular definition detected for group %r:" % self.name, elem) + self.parse_error("Circular definition detected for group %r:" % self.name) else: if child.get('minOccurs', '1') != '1' or child.get('maxOccurs', '1') != '1': self.parse_error( - "Redefined group reference cannot have minOccurs/maxOccurs other than 1:", elem + "Redefined group reference cannot have minOccurs/maxOccurs other than 1:" ) self.append(self.redefine) else: diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 8e7f9c2..5656b80 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -27,9 +27,9 @@ from ..exceptions import XMLSchemaTypeError, XMLSchemaURLError, XMLSchemaKeyErro from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \ VC_TYPE_UNAVAILABLE, VC_FACET_AVAILABLE, VC_FACET_UNAVAILABLE, XSD_SCHEMA, \ XSD_ANNOTATION, XSD_NOTATION, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_GROUP, \ - XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_ANY, \ - XSD_ANY_ATTRIBUTE, XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, XSD_OVERRIDE, \ - XSD_DEFAULT_OPEN_CONTENT + XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_CHOICE, \ + XSD_ALL, XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, \ + XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, XHTML_NAMESPACE, \ XLINK_NAMESPACE, VC_NAMESPACE, NamespaceResourcesMap, NamespaceView @@ -644,6 +644,19 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): attribute_group[None] = self.BUILDERS.any_attribute_class(ANY_ATTRIBUTE_ELEMENT, self, attribute_group) return attribute_group + def create_empty_content_group(self, parent, model='sequence'): + if model == 'sequence': + group_elem = etree_element(XSD_SEQUENCE) + elif model == 'choice': + group_elem = etree_element(XSD_CHOICE) + elif model == 'all': + group_elem = etree_element(XSD_ALL) + else: + raise XMLSchemaValueError("'model' argument must be (sequence | choice | all)") + + group_elem.text = '\n ' + return self.BUILDERS.group_class(group_elem, self, parent) + def copy(self): """Makes a copy of the schema instance. The new instance has independent maps of shared XSD components.""" schema = object.__new__(self.__class__) @@ -1113,7 +1126,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): """ if not self.built: if self.meta_schema is not None: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) self.build() if not isinstance(source, XMLResource): @@ -1195,7 +1208,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): """ if not self.built: if self.meta_schema is not None: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) self.build() if validation not in XSD_VALIDATION_MODES: @@ -1272,7 +1285,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): """ if not self.built: if self.meta_schema is not None: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) self.build() if validation not in XSD_VALIDATION_MODES: diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 7666d0f..abf7793 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -588,7 +588,7 @@ class Xsd11AnyElement(XsdAnyElement): namespace = default_namespace if group in self.precedences: - if not occurs: + if occurs is None: if any(e.is_matching(name) for e in self.precedences[group]): return False elif any(e.is_matching(name) and not e.is_over(occurs[e]) for e in self.precedences[group]): From c36ef4a26a96155310db8b0c5f141efc1959fde4 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 6 Sep 2019 08:46:02 +0200 Subject: [PATCH 07/36] Copy identities dict for element references --- xmlschema/validators/elements.py | 5 +++++ xmlschema/validators/identities.py | 8 ++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 8ec63b0..73c5aa0 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -130,6 +130,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) self.type = xsd_element.type self.qualified = xsd_element.qualified + for attr_name in ('type', 'nillable', 'default', 'fixed', 'form', 'block', 'abstract', 'final', 'substitutionGroup'): if attr_name in attrib: @@ -253,6 +254,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return 0 def _parse_identity_constraints(self, index=0): + if self.ref is not None: + self.identities = self.ref.identities + return + self.identities = {} for child in filter(lambda x: x.tag != XSD_ANNOTATION, self.elem[index:]): if child.tag == XSD_UNIQUE: diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index bc6afd5..61b84f2 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -212,11 +212,7 @@ class XsdIdentity(XsdComponent): def built(self): return self.selector is not None - def __call__(self, *args, **kwargs): - for error in self.validator(*args, **kwargs): - yield error - - def validator(self, elem): + def __call__(self, elem): values = Counter() for v in self.iter_values(elem): if isinstance(v, XMLSchemaValidationError): @@ -308,7 +304,7 @@ class XsdKeyref(XsdIdentity): values.add(v) return values - def validator(self, elem): + def __call__(self, elem): if self.refer is None: return From c35f86d6812a6c4048209baab7e26b362a5977df Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 6 Sep 2019 11:37:30 +0200 Subject: [PATCH 08/36] Add IDREF checking and fix ID collecting - ID are not counted for XML document root - IDREF checked with adding a key with 0 value if missing (TODO: Add a validation error instance or a reference) --- xmlschema/validators/elements.py | 4 +-- xmlschema/validators/schema.py | 13 ++++++++++ xmlschema/validators/simple_types.py | 39 +++++++++++++++++++--------- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 73c5aa0..301d6a7 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -467,7 +467,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) # Decode attributes attribute_group = getattr(xsd_type, 'attributes', self.attributes) - for result in attribute_group.iter_decode(elem.attrib, validation, **kwargs): + for result in attribute_group.iter_decode(elem.attrib, validation, level=level, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) else: @@ -529,7 +529,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if 'filler' in kwargs: value = kwargs['filler'](self) else: - for result in xsd_type.iter_decode(text, validation, **kwargs): + for result in xsd_type.iter_decode(text, validation, level=level, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) elif result is None and 'filler' in kwargs: diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 5656b80..67e7b36 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -1172,6 +1172,13 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): else: del result + # Check unresolved IDREF values + for k, v in id_map.items(): + if isinstance(v, XMLSchemaValidationError): + yield v + elif v == 0: + yield self.validation_error('lax', "IDREF %r not found in XML document" % k, source.root) + def iter_decode(self, source, path=None, schema_path=None, validation='lax', process_namespaces=True, namespaces=None, use_defaults=True, decimal_type=None, datetime_types=False, converter=None, filler=None, fill_missing=False, **kwargs): @@ -1243,6 +1250,12 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): fill_missing=fill_missing, id_map=id_map, **kwargs): yield obj + for k, v in id_map.items(): + if isinstance(v, XMLSchemaValidationError): + yield v + elif v == 0: + yield self.validation_error('lax', "IDREF %r not found in XML document" % k, source.root) + def decode(self, source, path=None, schema_path=None, validation='strict', *args, **kwargs): """ Decodes XML data. Takes the same arguments of the method :func:`XMLSchema.iter_decode`. diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 2d75eae..0180a4d 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -17,19 +17,21 @@ from decimal import DecimalException from ..compat import string_base_type, unicode_type from ..etree import etree_element from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError -from ..qnames import ( - XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, - XSD_ANY_ATTRIBUTE, XSD_PATTERN, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, - XSD_MAX_EXCLUSIVE, XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, - XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, XSD_ID, - XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, XSD_ERROR, XSD_ASSERT -) +from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ + XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE, XSD_PATTERN, \ + XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, \ + XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ + XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ + XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ + XSD_ERROR, XSD_ASSERT from ..helpers import get_qname, local_name, get_xsd_derivation_attribute -from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, XMLSchemaDecodeError, XMLSchemaParseError +from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ + XMLSchemaDecodeError, XMLSchemaParseError from .xsdbase import XsdAnnotation, XsdType, ValidationMixin -from .facets import XsdFacet, XsdWhiteSpaceFacet, XSD_10_FACETS_BUILDERS, XSD_11_FACETS_BUILDERS, XSD_10_FACETS, \ - XSD_11_FACETS, XSD_10_LIST_FACETS, XSD_11_LIST_FACETS, XSD_10_UNION_FACETS, XSD_11_UNION_FACETS, MULTIPLE_FACETS +from .facets import XsdFacet, XsdWhiteSpaceFacet, XSD_10_FACETS_BUILDERS, \ + XSD_11_FACETS_BUILDERS, XSD_10_FACETS, XSD_11_FACETS, XSD_10_LIST_FACETS, \ + XSD_11_LIST_FACETS, XSD_10_UNION_FACETS, XSD_11_UNION_FACETS, MULTIPLE_FACETS def xsd_simple_type_factory(elem, schema, parent): @@ -515,16 +517,29 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.decode_error(validation, obj, self.to_python, reason="value is not an instance of {!r}".format(self.instance_types)) - if self.name == XSD_ID: + if self.name == XSD_ID and kwargs.get('level'): try: id_map = kwargs['id_map'] except KeyError: pass else: - id_map[obj] += 1 + try: + id_map[obj] += 1 + except TypeError: + id_map[obj] = 1 + if id_map[obj] > 1: yield self.validation_error(validation, "Duplicated xsd:ID value {!r}".format(obj)) + elif self.name == XSD_IDREF: + try: + id_map = kwargs['id_map'] + except KeyError: + pass + else: + if obj not in id_map: + id_map[obj] = kwargs.get('node', 0) + if validation == 'skip': try: yield self.to_python(obj) From 07070ad714ccf56937e2681351c9e71fd05024ee Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 11 Sep 2019 18:43:13 +0200 Subject: [PATCH 09/36] Fix W3C instance tests - Add iter_substitutes() to Xsd11Element to fix upa.xsd/upa2.xsd tests with instances. Now for XsdElement the abstract substitutes are simply ignored by iter_substitutes(). --- tox.ini | 5 ++ xmlschema/tests/test_w3c_suite.py | 4 +- xmlschema/validators/assertions.py | 12 ++- xmlschema/validators/attributes.py | 21 +++-- xmlschema/validators/complex_types.py | 11 ++- xmlschema/validators/elements.py | 118 +++++++++++++++++--------- xmlschema/validators/globals_.py | 50 ++++++----- xmlschema/validators/groups.py | 13 ++- xmlschema/validators/schema.py | 40 ++++++--- xmlschema/validators/simple_types.py | 2 +- xmlschema/validators/wildcards.py | 13 ++- xmlschema/validators/xsdbase.py | 12 +-- xmlschema/xpath.py | 6 +- 13 files changed, 200 insertions(+), 107 deletions(-) diff --git a/tox.ini b/tox.ini index 29ed924..90b6ff7 100644 --- a/tox.ini +++ b/tox.ini @@ -22,6 +22,11 @@ deps = commands = python xmlschema/tests/test_all.py {posargs} whitelist_externals = make +[testenv:py38] +deps = + lxml==4.3.5 + elementpath~=1.2.0 + [testenv:package] commands = python xmlschema/tests/test_package.py diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 2f3080a..703d500 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -197,7 +197,9 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 test_conf = {} for version in xsd_version.split(): - if version not in args.version: + if 'version' in elem.attrib and version not in elem.attrib['version']: + continue + elif version not in args.version: continue elif version == '1.1' and source_href in XSD11_SKIPPED_TESTS: continue diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index d7e3746..6ff2a9f 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -80,11 +80,15 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.parse_error(err, elem=self.elem) self.token = self.parser.parse('true()') - def __call__(self, elem, value=None): + def __call__(self, elem, value=None, source=None, **kwargs): self.parser.variables['value'] = value - if not self.token.evaluate(XPathContext(root=elem)): - msg = "expression is not true with test path %r." - yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) + root = elem if source is None else source.root + try: + if not self.token.evaluate(XPathContext(root=root, item=elem)): + msg = "expression is not true with test path %r." + yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) + except ElementPathError as err: + yield XMLSchemaValidationError(self, obj=elem, reason=str(err)) # For implementing ElementPathMixin def __iter__(self): diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index 770fc78..5347cc1 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -88,6 +88,9 @@ class XsdAttribute(XsdComponent, ValidationMixin): if 'default' in attrib: self.default = attrib['default'] + if 'fixed' in attrib: + self.fixed = attrib['fixed'] + if self._parse_reference(): try: xsd_attribute = self.maps.lookup_attribute(self.name) @@ -104,9 +107,11 @@ class XsdAttribute(XsdComponent, ValidationMixin): self.default = xsd_attribute.default if xsd_attribute.fixed is not None: - self.fixed = xsd_attribute.fixed - if 'fixed' in attrib and attrib['fixed'] != self.fixed: - self.parse_error("referenced attribute has a different fixed value %r" % xsd_attribute.fixed) + if self.fixed is None: + self.fixed = xsd_attribute.fixed + elif xsd_attribute.fixed != self.fixed: + msg = "referenced attribute has a different fixed value %r" + self.parse_error(msg % xsd_attribute.fixed) for attribute in ('form', 'type'): if attribute in self.elem.attrib: @@ -117,9 +122,6 @@ class XsdAttribute(XsdComponent, ValidationMixin): self.parse_error("not allowed type definition for XSD attribute reference") return - if 'fixed' in attrib: - self.fixed = attrib['fixed'] - try: form = get_xsd_form_attribute(self.elem, 'form') except ValueError as err: @@ -390,6 +392,9 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): except KeyError: self.parse_error("an attribute group declaration requires a 'name' attribute.") return + else: + if self.schema.default_attributes == self.name and self.xsd_version > '1.0': + self.schema.default_attributes = self attributes = ordered_dict_class() for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): @@ -601,6 +606,10 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): reason = "%r attribute not allowed for element." % name yield self.validation_error(validation, reason, attrs, **kwargs) continue + else: + if xsd_attribute.use == 'prohibited': + reason = "use of attribute %r is prohibited" % name + yield self.validation_error(validation, reason, attrs, **kwargs) for result in xsd_attribute.iter_decode(value, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index ce6a12a..129b61b 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -578,7 +578,7 @@ class XsdComplexType(XsdType, ValidationMixin): """ # XSD 1.1 assertions for assertion in self.assertions: - for error in assertion(elem): + for error in assertion(elem, **kwargs): yield self.validation_error(validation, error, **kwargs) for result in self.attributes.iter_decode(elem.attrib, validation, **kwargs): @@ -699,11 +699,14 @@ class Xsd11ComplexType(XsdComplexType): elif not self.attributes[name].inheritable: self.parse_error("attribute %r must be inheritable") - if self.elem.get('defaultAttributesApply') in {'false', '0'}: - self.default_attributes_apply = False + if 'defaultAttributesApply' in self.elem.attrib: + if self.elem.attrib['defaultAttributesApply'].strip() in {'false', '0'}: + self.default_attributes_apply = False # Add default attributes - if self.default_attributes_apply and isinstance(self.schema.default_attributes, XsdAttributeGroup): + if self.schema.default_attributes is None: + pass + elif self.default_attributes_apply: if self.redefine is None and any(k in self.attributes for k in self.schema.default_attributes): self.parse_error("at least a default attribute is already declared in the complex type") self.attributes.update( diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 301d6a7..d7cac5d 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -66,7 +66,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) """ type = None qualified = False - attributes = None _ADMITTED_TAGS = {XSD_ELEMENT} _abstract = False @@ -78,7 +77,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def __init__(self, elem, schema, parent): super(XsdElement, self).__init__(elem, schema, parent) - self.names = (self.qualified_name,) if self.qualified else (self.qualified_name, self.local_name) + if self.qualified or self.ref is not None or 'targetNamespace' in elem.attrib: + self.names = (self.qualified_name,) + else: + self.names = (self.qualified_name, self.local_name) if self.type is None: raise XMLSchemaAttributeError("undefined 'type' attribute for %r." % self) if self.qualified is None: @@ -93,12 +95,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def __setattr__(self, name, value): if name == "type": assert value is None or isinstance(value, XsdType), "Wrong value %r for attribute 'type'." % value - if hasattr(value, 'attributes'): - self.attributes = value.attributes - else: - self.attributes = self.schema.BUILDERS.attribute_group_class( - XSD_ATTRIBUTE_GROUP_ELEMENT, self.schema, self - ) + self.attributes = self.get_attributes(value) super(XsdElement, self).__setattr__(name, value) def __iter__(self): @@ -130,7 +127,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) self.type = xsd_element.type self.qualified = xsd_element.qualified - for attr_name in ('type', 'nillable', 'default', 'fixed', 'form', 'block', 'abstract', 'final', 'substitutionGroup'): if attr_name in attrib: @@ -388,6 +384,12 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def get_type(self, elem): return self.type + def get_attributes(self, xsd_type): + try: + return xsd_type.attributes + except AttributeError: + return self.schema.empty_attribute_group + def get_path(self, ancestor=None, reverse=False): """ Returns the XPath expression of the element. The path is relative to the schema instance @@ -427,9 +429,11 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def iter_substitutes(self): for xsd_element in self.maps.substitution_groups.get(self.name, ()): - yield xsd_element + if not xsd_element.abstract: + yield xsd_element for e in xsd_element.iter_substitutes(): - yield e + if not e.abstract: + yield e def data_value(self, elem): """Returns the decoded data value of the provided element as XPath fn:data().""" @@ -454,19 +458,29 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) converter = self.schema.get_converter(converter, level=level, **kwargs) value = content = attributes = None - # Get the instance type: xsi:type or the schema's declaration - if XSI_TYPE not in elem.attrib: - xsd_type = self.get_type(elem) - else: - xsi_type = elem.attrib[XSI_TYPE] + # Get the instance effective type + xsd_type = self.get_type(elem) + if XSI_TYPE in elem.attrib: + type_name = elem.attrib[XSI_TYPE] try: - xsd_type = self.maps.lookup_type(converter.unmap_qname(xsi_type)) - except KeyError: - yield self.validation_error(validation, "unknown type %r" % xsi_type, elem, **kwargs) - xsd_type = self.get_type(elem) + if hasattr(xsd_type, 'attributes') and XSI_TYPE in xsd_type.attributes: + xsd_type.attributes[XSI_TYPE].validate(type_name) + except XMLSchemaValidationError as err: + yield self.validation_error(validation, err, elem, **kwargs) + else: + try: + xsi_type = self.maps.lookup_type(converter.unmap_qname(type_name)) + except KeyError as err: + yield self.validation_error(validation, err, elem, **kwargs) + else: + if xsi_type.is_derived(xsd_type): + xsd_type = xsi_type + else: + reason = "%r is not a derived type of %r" % (xsd_type, self.type) + yield self.validation_error(validation, reason, elem, **kwargs) # Decode attributes - attribute_group = getattr(xsd_type, 'attributes', self.attributes) + attribute_group = self.get_attributes(xsd_type) for result in attribute_group.iter_decode(elem.attrib, validation, level=level, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) @@ -474,23 +488,28 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) attributes = result # Checks the xsi:nil attribute of the instance - if validation != 'skip' and XSI_NIL in elem.attrib: + if XSI_NIL in elem.attrib: + xsi_nil = elem.attrib[XSI_NIL].strip() if not self.nillable: yield self.validation_error(validation, "element is not nillable.", elem, **kwargs) - try: - if elem.attrib[XSI_NIL].strip() in ('true', '1'): - if elem.text is not None: - reason = "xsi:nil='true' but the element is not empty." - yield self.validation_error(validation, reason, elem, **kwargs) - else: - element_data = ElementData(elem.tag, None, None, attributes) - yield converter.element_decode(element_data, self, level) - return - except TypeError: + elif xsi_nil not in {'0', '1', 'false', 'true'}: reason = "xsi:nil attribute must has a boolean value." yield self.validation_error(validation, reason, elem, **kwargs) + elif xsi_nil in ('0', 'false'): + pass + elif elem.text is not None or len(elem): + reason = "xsi:nil='true' but the element is not empty." + yield self.validation_error(validation, reason, elem, **kwargs) + else: + element_data = ElementData(elem.tag, None, None, attributes) + yield converter.element_decode(element_data, self, level) + return if not xsd_type.has_simple_content(): + for assertion in xsd_type.assertions: + for error in assertion(elem, **kwargs): + yield self.validation_error(validation, error, **kwargs) + for result in xsd_type.content_type.iter_decode( elem, validation, converter, level + 1, **kwargs): if isinstance(result, XMLSchemaValidationError): @@ -515,21 +534,28 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) elif not text and kwargs.get('use_defaults') and self.default is not None: text = self.default - if not xsd_type.is_simple(): + if xsd_type.is_complex(): + if text and xsd_type.content_type.is_list(): + value = text.split() + else: + value = text + for assertion in xsd_type.assertions: - for error in assertion(elem, value=text): + for error in assertion(elem, value=value, **kwargs): yield self.validation_error(validation, error, **kwargs) xsd_type = xsd_type.content_type if text is None: - for result in xsd_type.iter_decode('', validation, **kwargs): + for result in xsd_type.iter_decode('', validation, _skip_id=True, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) if 'filler' in kwargs: value = kwargs['filler'](self) else: - for result in xsd_type.iter_decode(text, validation, level=level, **kwargs): + if level == 0: + kwargs['_skip_id'] = True + for result in xsd_type.iter_decode(text, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) elif result is None and 'filler' in kwargs: @@ -594,20 +620,22 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) else: xsd_type = self.get_type(element_data) - attribute_group = getattr(xsd_type, 'attributes', self.attributes) + attribute_group = self.get_attributes(xsd_type) for result in attribute_group.iter_encode(element_data.attributes, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): errors.append(result) else: attributes = result - if validation != 'skip' and XSI_NIL in element_data.attributes: + if XSI_NIL in element_data.attributes: + xsi_nil = element_data.attributes[XSI_NIL].strip() if not self.nillable: errors.append("element is not nillable.") - xsi_nil = element_data.attributes[XSI_NIL] - if xsi_nil.strip() not in ('0', '1', 'true', 'false'): + elif xsi_nil not in {'0', '1', 'true', 'false'}: errors.append("xsi:nil attribute must has a boolean value.") - if element_data.text is not None: + elif xsi_nil in ('0', 'false'): + pass + elif element_data.text is not None or element_data.content: errors.append("xsi:nil='true' but the element is not empty.") else: elem = converter.etree_element(element_data.tag, attrib=attributes, level=level) @@ -865,6 +893,12 @@ class Xsd11Element(XsdElement): for obj in self.type.iter_components(xsd_classes): yield obj + def iter_substitutes(self): + for xsd_element in self.maps.substitution_groups.get(self.name, ()): + yield xsd_element + for e in xsd_element.iter_substitutes(): + yield e + def get_type(self, elem): if not self.alternatives: return self.type @@ -908,7 +942,7 @@ class Xsd11Element(XsdElement): if other.process_contents == 'skip': return True xsd_element = other.match(self.name, self.default_namespace, resolve=True) - return xsd_element is None or self.is_consistent(xsd_element, False) + return xsd_element is None or self.is_consistent(xsd_element, strict=False) if self.name == other.name: e = self diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index 18f4866..1fabe9c 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -15,6 +15,7 @@ from __future__ import unicode_literals import warnings from collections import Counter +from ..compat import string_base_type from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning from ..namespaces import XSD_NAMESPACE from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, XSD_SIMPLE_TYPE, \ @@ -119,7 +120,7 @@ def create_lookup_function(xsd_classes): else: types_desc = xsd_classes.__name__ - def lookup(global_map, qname, tag_map): + def lookup(qname, global_map, tag_map): try: obj = global_map[qname] except KeyError: @@ -236,22 +237,22 @@ class XsdGlobals(XsdValidator): __copy__ = copy def lookup_notation(self, qname): - return lookup_notation(self.notations, qname, self.validator.BUILDERS_MAP) + return lookup_notation(qname, self.notations, self.validator.BUILDERS_MAP) def lookup_type(self, qname): - return lookup_type(self.types, qname, self.validator.BUILDERS_MAP) + return lookup_type(qname, self.types, self.validator.BUILDERS_MAP) def lookup_attribute(self, qname): - return lookup_attribute(self.attributes, qname, self.validator.BUILDERS_MAP) + return lookup_attribute(qname, self.attributes, self.validator.BUILDERS_MAP) def lookup_attribute_group(self, qname): - return lookup_attribute_group(self.attribute_groups, qname, self.validator.BUILDERS_MAP) + return lookup_attribute_group(qname, self.attribute_groups, self.validator.BUILDERS_MAP) def lookup_group(self, qname): - return lookup_group(self.groups, qname, self.validator.BUILDERS_MAP) + return lookup_group(qname, self.groups, self.validator.BUILDERS_MAP) def lookup_element(self, qname): - return lookup_element(self.elements, qname, self.validator.BUILDERS_MAP) + return lookup_element(qname, self.elements, self.validator.BUILDERS_MAP) def lookup(self, tag, qname): """ @@ -314,6 +315,10 @@ class XsdGlobals(XsdValidator): def xsd_version(self): return self.validator.XSD_VERSION + @property + def builders_map(self): + return self.validator.BUILDERS_MAP + @property def all_errors(self): errors = [] @@ -455,8 +460,23 @@ class XsdGlobals(XsdValidator): self.lookup_notation(qname) for qname in self.attributes: self.lookup_attribute(qname) + for qname in self.attribute_groups: self.lookup_attribute_group(qname) + for schema in filter( + lambda x: isinstance(x.default_attributes, string_base_type), + not_built_schemas): + try: + schema.default_attributes = schema.maps.attribute_groups[schema.default_attributes] + except KeyError: + schema.default_attributes = None + msg = "defaultAttributes={!r} doesn't match an attribute group of {!r}" + schema.parse_error( + error=msg.format(schema.root.get('defaultAttributes'), schema), + elem=schema.root, + validation=schema.validation + ) + for qname in self.types: self.lookup_type(qname) for qname in self.elements: @@ -470,7 +490,7 @@ class XsdGlobals(XsdValidator): group.build() # Build XSD 1.1 identity references and assertions - if self.validator.XSD_VERSION != '1.0': + if self.xsd_version != '1.0': for schema in filter(lambda x: x.meta_schema is not None, not_built_schemas): for e in schema.iter_components(Xsd11Element): for constraint in filter(lambda x: x.ref is not None, e.identities.values()): @@ -513,21 +533,9 @@ class XsdGlobals(XsdValidator): if e is xsd_element: msg = "circularity found for substitution group with head element %r" e.parse_error(msg.format(e), validation=validation) - elif e.abstract and e.name not in self.substitution_groups and self.validator.XSD_VERSION > '1.0': + elif e.abstract and e.name not in self.substitution_groups and self.xsd_version > '1.0': self.parse_error("in XSD 1.1 an abstract element cannot be member of a substitution group") - if self.validator.XSD_VERSION > '1.0': - for s in filter(lambda x: x.default_attributes is not None, schemas): - if isinstance(s.default_attributes, XsdAttributeGroup): - continue - - try: - s.default_attributes = s.maps.attribute_groups[s.default_attributes] - except KeyError: - s.default_attributes = None - msg = "defaultAttributes={!r} doesn't match an attribute group of {!r}" - s.parse_error(msg.format(s.root.get('defaultAttributes'), s), s.root, validation) - if validation == 'strict' and not self.built: raise XMLSchemaNotBuiltError(self, "global map has unbuilt components: %r" % self.unbuilt) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 0fafa90..75d34a8 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -20,7 +20,7 @@ from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ XSD_CHOICE, XSD_ELEMENT, XSD_ANY from xmlschema.helpers import get_qname, local_name -from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError +from .exceptions import XMLSchemaValidationError, XMLSchemaModelError, XMLSchemaChildrenValidationError from .xsdbase import ValidationMixin, XsdComponent, XsdType from .elements import XsdElement from .wildcards import XsdAnyElement, Xsd11AnyElement @@ -545,6 +545,17 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: continue break + elif isinstance(xsd_element, XsdAnyElement): + try: + matched_element = self.maps.lookup_element(child.tag) + except LookupError: + pass + else: + # EDC check of matched element + for e in filter(lambda x: isinstance(x, XsdElement), self.iter_elements()): + if not matched_element.is_consistent(e): + msg = "%r that matches %r is not consistent with local declaration %r" + raise XMLSchemaModelError(self, msg % (child, xsd_element, e)) for particle, occurs, expected in model.advance(True): errors.append((index, particle, occurs, expected)) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 67e7b36..d46f5ac 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -319,6 +319,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.converter = self.get_converter(converter) self.xpath_proxy = XMLSchemaProxy(self) + self.empty_attribute_group = self.BUILDERS.attribute_group_class( + etree_element(XSD_ATTRIBUTE_GROUP), self, self + ) # Create or set the XSD global maps instance if self.meta_schema is None: @@ -634,16 +637,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): return group - def create_any_attribute_group(self, parent): - """ - Creates an attribute group related to schema instance that accepts any attribute. - - :param parent: the parent component to set for the any attribute group. - """ - attribute_group = self.BUILDERS.attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent) - attribute_group[None] = self.BUILDERS.any_attribute_class(ANY_ATTRIBUTE_ELEMENT, self, attribute_group) - return attribute_group - def create_empty_content_group(self, parent, model='sequence'): if model == 'sequence': group_elem = etree_element(XSD_SEQUENCE) @@ -657,8 +650,33 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): group_elem.text = '\n ' return self.BUILDERS.group_class(group_elem, self, parent) + def create_any_attribute_group(self, parent): + """ + Creates an attribute group related to schema instance that accepts any attribute. + + :param parent: the parent component to set for the any attribute group. + """ + attribute_group = self.BUILDERS.attribute_group_class( + ATTRIBUTE_GROUP_ELEMENT, self, parent + ) + attribute_group[None] = self.BUILDERS.any_attribute_class( + ANY_ATTRIBUTE_ELEMENT, self, attribute_group + ) + return attribute_group + + def create_empty_attribute_group(self, parent): + """ + Creates an empty attribute group related to schema instance. + + :param parent: the parent component to set for the any attribute group. + """ + return self.BUILDERS.attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent) + def copy(self): - """Makes a copy of the schema instance. The new instance has independent maps of shared XSD components.""" + """ + Makes a copy of the schema instance. The new instance has independent maps + of shared XSD components. + """ schema = object.__new__(self.__class__) schema.__dict__.update(self.__dict__) schema.source = self.source.copy() diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 0180a4d..e0cbba6 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -517,7 +517,7 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.decode_error(validation, obj, self.to_python, reason="value is not an instance of {!r}".format(self.instance_types)) - if self.name == XSD_ID and kwargs.get('level'): + if self.name == XSD_ID and '_skip_id' not in kwargs: try: id_map = kwargs['id_map'] except KeyError: diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index abf7793..74cff18 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -323,6 +323,7 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): """ _ADMITTED_TAGS = {XSD_ANY} + precedences = () def __repr__(self): if self.namespace: @@ -409,6 +410,7 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): name, value = obj namespace = get_namespace(name) + if self.is_namespace_allowed(namespace): self._load_namespace(namespace) try: @@ -502,9 +504,8 @@ class XsdAnyAttribute(XsdWildcard): return name, value = attribute - namespace = get_namespace(name) - if self.is_namespace_allowed(namespace): - self._load_namespace(namespace) + if self.is_matching(name): + self._load_namespace(get_namespace(name)) try: xsd_attribute = self.maps.lookup_attribute(name) except LookupError: @@ -559,8 +560,6 @@ class Xsd11AnyElement(XsdAnyElement): Content: (annotation?) """ - precedences = () - def _parse(self): super(Xsd11AnyElement, self)._parse() self._parse_not_constraints() @@ -607,7 +606,7 @@ class Xsd11AnyElement(XsdAnyElement): if isinstance(other, XsdAnyElement) or self.process_contents == 'skip': return True xsd_element = self.match(other.name, other.default_namespace, resolve=True) - return xsd_element is None or other.is_consistent(xsd_element, False) + return xsd_element is None or other.is_consistent(xsd_element, strict=False) def add_precedence(self, other, group): if not self.precedences: @@ -636,7 +635,7 @@ class Xsd11AnyAttribute(XsdAnyAttribute): super(Xsd11AnyAttribute, self)._parse() self._parse_not_constraints() - def is_matching(self, name, default_namespace=None, group=None): + def is_matching(self, name, default_namespace=None, **kwargs): if name is None: return False elif not name or name[0] == '{': diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 10dcb78..25726dd 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -395,12 +395,12 @@ class XsdComponent(XsdValidator): self.parse_error("a declaration contained in a global complexType " "must has the same namespace as its parent schema") - if not self._target_namespace and self.name[0] == '{': - self.name = local_name(self.name) - elif self.name[0] != '{': - self.name = '{%s}%s' % (self._target_namespace, self.name) - else: - self.name = '{%s}%s' % (self._target_namespace, local_name(self.name)) + if not self._target_namespace and self.name[0] == '{': + self.name = local_name(self.name) + elif self.name[0] != '{': + self.name = '{%s}%s' % (self._target_namespace, self.name) + else: + self.name = '{%s}%s' % (self._target_namespace, local_name(self.name)) @property def local_name(self): diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 5614981..e60d2fa 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -158,9 +158,9 @@ class ElementPathMixin(Sequence): :cvar text: The Element text. Its value is always `None`. For compatibility with the ElementTree API. :cvar tail: The Element tail. Its value is always `None`. For compatibility with the ElementTree API. """ - _attrib = {} text = None tail = None + attributes = {} namespaces = {} xpath_default_namespace = None xpath_proxy = None @@ -189,11 +189,11 @@ class ElementPathMixin(Sequence): @property def attrib(self): """Returns the Element attributes. For compatibility with the ElementTree API.""" - return getattr(self, 'attributes', self._attrib) + return self.attributes def get(self, key, default=None): """Gets an Element attribute. For compatibility with the ElementTree API.""" - return self.attrib.get(key, default) + return self.attributes.get(key, default) def iterfind(self, path, namespaces=None): """ From a4b1d8896b2990f8af1f8f20da07837862b3252c Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 13 Sep 2019 07:31:55 +0200 Subject: [PATCH 10/36] Add 'restriction' attribute to XsdGroup for simplify dynamic EDC checks --- xmlschema/tests/test_w3c_suite.py | 14 +++++++++++++- xmlschema/validators/complex_types.py | 3 +++ xmlschema/validators/exceptions.py | 9 +++++++-- xmlschema/validators/groups.py | 8 ++++++-- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 703d500..7294758 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -98,6 +98,15 @@ SKIPPED_TESTS = { # Invalid XML tests '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 '../msData/additional/test93490_8.xml', # 4799: Idem + + # Skip for missing XML version 1.1 implementation + '../saxonData/XmlVersions/xv001.v01.xml', # 14850 + '../saxonData/XmlVersions/xv003.v01.xml', # 14852 + '../saxonData/XmlVersions/xv005.v01.xml', # 14854 + '../saxonData/XmlVersions/xv006.v01.xml', # 14855: invalid character  (valid in XML 1.1) + '../saxonData/XmlVersions/xv006.n02.xml', # 14855: invalid character 𐀀 (valid in XML 1.1) + '../saxonData/XmlVersions/xv008.v01.xml', # 14857 + '../saxonData/XmlVersions/xv008.n01.xml', # 14857 } XSD11_SKIPPED_TESTS = { @@ -185,7 +194,10 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 return if source_href in SKIPPED_TESTS: if args.numbers: - print("Skip test number %d ..." % testgroup_num) + if source_href.endswith('.xsd'): + print("Skip test number %d ..." % testgroup_num) + else: + print("Skip file %r for test number %d ..." % (source_href, testgroup_num)) return # Normalize and check file path diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 129b61b..33b4e24 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -335,6 +335,8 @@ class XsdComplexType(XsdType, ValidationMixin): else: content_type = self.schema.create_empty_content_group(self, base_type.content_type.model) + content_type.restriction = base_type.content_type + if base_type.is_element_only() and content_type.mixed: self.parse_error( "derived a mixed content from a base type that has element-only content.", elem @@ -355,6 +357,7 @@ class XsdComplexType(XsdType, ValidationMixin): msg = "{!r} is not a restriction of the base type {!r}" self.parse_error(msg.format(self.open_content, base_type.open_content)) + content_type self.content_type = content_type self._parse_content_tail(elem, derivation='restriction', base_attributes=base_type.attributes) diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index 4cb009c..fdb1836 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -13,7 +13,7 @@ This module contains exception and warning classes for the 'xmlschema.validators """ from __future__ import unicode_literals -from ..compat import PY3 +from ..compat import PY3, string_base_type from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError from ..etree import etree_tostring, is_etree_element, etree_getpath from ..helpers import qname_to_prefixed @@ -198,9 +198,14 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): :type namespaces: dict """ def __init__(self, validator, obj, reason=None, source=None, namespaces=None): + if not isinstance(obj, string_base_type): + _obj = obj + else: + _obj = obj.encode('ascii', 'xmlcharrefreplace').decode('utf-8') + super(XMLSchemaValidationError, self).__init__( validator=validator, - message="failed validating {!r} with {!r}".format(obj, validator), + message="failed validating {!r} with {!r}".format(_obj, validator), elem=obj if is_etree_element(obj) else None, source=source, namespaces=namespaces, diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 75d34a8..9a93394 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -77,6 +77,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): mixed = False model = None redefine = None + restriction = None interleave = None # an Xsd11AnyElement in case of XSD 1.1 openContent with mode='interleave' suffix = None # an Xsd11AnyElement in case of openContent with mode='suffix' or 'interleave' @@ -545,14 +546,17 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: continue break - elif isinstance(xsd_element, XsdAnyElement): + elif isinstance(xsd_element, XsdAnyElement) and xsd_element.process_contents != 'skip': try: matched_element = self.maps.lookup_element(child.tag) except LookupError: pass else: + # If it's a restriction the context is the base_type's group + group = self.restriction if self.restriction is not None else self + # EDC check of matched element - for e in filter(lambda x: isinstance(x, XsdElement), self.iter_elements()): + for e in filter(lambda x: isinstance(x, XsdElement), group.iter_elements()): if not matched_element.is_consistent(e): msg = "%r that matches %r is not consistent with local declaration %r" raise XMLSchemaModelError(self, msg % (child, xsd_element, e)) From 590c7e6c41dba10c736c252efe8f715ca992076c Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 17 Sep 2019 16:37:16 +0200 Subject: [PATCH 11/36] Add intersection() to XsdWildcard - Renamed XsdWildcard.extend() to union() --- xmlschema/converters.py | 1 + xmlschema/helpers.py | 39 +++++++++- xmlschema/namespaces.py | 2 +- xmlschema/tests/validators/test_wildcards.py | 6 +- xmlschema/validators/attributes.py | 9 ++- xmlschema/validators/complex_types.py | 20 +++-- xmlschema/validators/elements.py | 56 ++++++++------ xmlschema/validators/groups.py | 81 ++++++++++++++++---- xmlschema/validators/simple_types.py | 15 +++- xmlschema/validators/wildcards.py | 78 ++++++++++++++++--- xmlschema/validators/xsdbase.py | 4 + 11 files changed, 242 insertions(+), 69 deletions(-) diff --git a/xmlschema/converters.py b/xmlschema/converters.py index 5e0e90a..b317039 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -36,6 +36,7 @@ attributes. def raw_xml_encode(value): + """Encodes a simple value to XML.""" if isinstance(value, bool): return 'true' if value else 'false' elif isinstance(value, (list, tuple)): diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index b8bccba..4ecb649 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -68,12 +68,13 @@ def local_name(qname): def qname_to_prefixed(qname, namespaces): """ - Transforms a fully qualified name into a prefixed name using a namespace map. Returns the - *qname* argument if it's not a fully qualified name or if it has boolean value `False`. + Transforms a fully qualified name into a prefixed name using a namespace map. + Returns the *qname* argument if it's not a fully qualified name or if it has + boolean value `False`. - :param qname: a fully qualified name or a local name. + :param qname: an extended QName or a local name. :param namespaces: a map from prefixes to namespace URIs. - :return: string with a prefixed or local reference. + :return: a QName in prefixed format or a local name. """ if not qname: return qname @@ -90,6 +91,36 @@ def qname_to_prefixed(qname, namespaces): return qname +def qname_to_extended(qname, namespaces): + """ + Converts a QName in prefixed format or a local name to the extended QName format. + + :param qname: a QName in prefixed format or a local name. + :param namespaces: a map from prefixes to namespace URIs. + :return: a QName in extended format or a local name. + """ + try: + if qname[0] == '{' or not namespaces: + return qname + except IndexError: + return qname + + try: + prefix, name = qname.split(':', 1) + except ValueError: + if not namespaces.get(''): + return qname + else: + return '{%s}%s' % (namespaces[''], qname) + else: + try: + uri = namespaces[prefix] + except KeyError: + return qname + else: + return u'{%s}%s' % (uri, name) if uri else name + + def get_xsd_annotation(elem): """ Returns the annotation of an XSD component. diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py index f1263ac..9be0986 100644 --- a/xmlschema/namespaces.py +++ b/xmlschema/namespaces.py @@ -82,7 +82,7 @@ class NamespaceResourcesMap(MutableMapping): class NamespaceMapper(MutableMapping): """ - A class to map/unmap namespace prefixes to URIs. + A class to map/unmap namespace prefixes to URIs. The :param namespaces: Initial data with namespace prefixes and URIs. """ diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py index c5f80df..0364f9b 100644 --- a/xmlschema/tests/validators/test_wildcards.py +++ b/xmlschema/tests/validators/test_wildcards.py @@ -174,14 +174,14 @@ class TestXsd11Wildcards(TestXsdWildcards): any1, any2, any3, any4 = schema.groups['group1'][:] self.assertListEqual(any1.namespace, ['tns1']) - any1.extend(any2) + any1.union(any2) self.assertListEqual(any1.namespace, ['tns1', 'tns2']) self.assertListEqual(any3.namespace, []) self.assertListEqual(any3.not_namespace, ['tns1']) - any3.extend(any4) + any3.union(any4) self.assertListEqual(any3.not_namespace, ['tns1']) - any4.extend(any3) + any4.union(any3) self.assertListEqual(any4.not_namespace, ['tns1']) def test_open_content_mode_interleave(self): diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index 5347cc1..7256379 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -452,10 +452,13 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): else: if not isinstance(base_attributes, tuple): for name, attr in base_attributes.items(): - if name is not None and name in attributes: + if name not in attributes: + attributes[name] = attr + elif name is not None: self.parse_error("multiple declaration for attribute {!r}".format(name)) else: - attributes[name] = attr + attributes[name].intersection(attr) + elif self.xsd_version == '1.0': self.parse_error("Circular reference found between attribute groups " "{!r} and {!r}".format(self.name, attribute_group_qname)) @@ -479,7 +482,7 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): if name is None: if self.derivation == 'extension': try: - attr.extend(base_attr) + attr.union(base_attr) except ValueError as err: self.parse_error(err) elif not attr.is_restriction(base_attr): diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 33b4e24..58d2601 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -10,12 +10,12 @@ # from __future__ import unicode_literals -from ..exceptions import XMLSchemaValueError +from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, XSD_ALL, \ XSD_CHOICE, XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, XSD_RESTRICTION, \ XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, \ - XSD_OPEN_CONTENT, XSD_ASSERT -from ..helpers import get_qname, local_name, get_xsd_derivation_attribute + XSD_OPEN_CONTENT, XSD_ASSERT, XSI_TYPE +from ..helpers import get_qname, local_name, qname_to_extended, get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError from .xsdbase import XsdType, ValidationMixin @@ -357,7 +357,6 @@ class XsdComplexType(XsdType, ValidationMixin): msg = "{!r} is not a restriction of the base type {!r}" self.parse_error(msg.format(self.open_content, base_type.open_content)) - content_type self.content_type = content_type self._parse_content_tail(elem, derivation='restriction', base_attributes=base_type.attributes) @@ -539,6 +538,17 @@ class XsdComplexType(XsdType, ValidationMixin): def get_facet(*_args, **_kwargs): return None + def get_instance_type(self, attrs, namespaces): + if XSI_TYPE in self.attributes: + self.attributes[XSI_TYPE].validate(attrs[XSI_TYPE]) + + type_qname = qname_to_extended(attrs[XSI_TYPE], namespaces) + xsi_type = self.maps.lookup_type(type_qname) + if not xsi_type.is_derived(self): + raise XMLSchemaTypeError("%r is not a derived type of %r" % (xsi_type, self)) + + return xsi_type + def admit_simple_restriction(self): if 'restriction' in self.final: return False @@ -733,7 +743,7 @@ class Xsd11ComplexType(XsdComplexType): break self.open_content = XsdOpenContent(group_elem, self.schema, self) try: - self.open_content.any_element.extend(base_type.open_content.any_element) + self.open_content.any_element.union(base_type.open_content.any_element) except AttributeError: pass else: diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index d7cac5d..c8f8930 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -65,6 +65,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) """ type = None + alternatives = () qualified = False _ADMITTED_TAGS = {XSD_ELEMENT} @@ -461,23 +462,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) # Get the instance effective type xsd_type = self.get_type(elem) if XSI_TYPE in elem.attrib: - type_name = elem.attrib[XSI_TYPE] try: - if hasattr(xsd_type, 'attributes') and XSI_TYPE in xsd_type.attributes: - xsd_type.attributes[XSI_TYPE].validate(type_name) - except XMLSchemaValidationError as err: + xsd_type = xsd_type.get_instance_type(elem.attrib, converter) + except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) - else: - try: - xsi_type = self.maps.lookup_type(converter.unmap_qname(type_name)) - except KeyError as err: - yield self.validation_error(validation, err, elem, **kwargs) - else: - if xsi_type.is_derived(xsd_type): - xsd_type = xsi_type - else: - reason = "%r is not a derived type of %r" % (xsd_type, self.type) - yield self.validation_error(validation, reason, elem, **kwargs) # Decode attributes attribute_group = self.get_attributes(xsd_type) @@ -610,15 +598,12 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) children = element_data.content attributes = () - if element_data.attributes and XSI_TYPE in element_data.attributes: - xsi_type = element_data.attributes[XSI_TYPE] + xsd_type = self.get_type(element_data) + if XSI_TYPE in element_data.attributes: try: - xsd_type = self.maps.lookup_type(converter.unmap_qname(xsi_type)) - except KeyError: - errors.append("unknown type %r" % xsi_type) - xsd_type = self.get_type(element_data) - else: - xsd_type = self.get_type(element_data) + xsd_type = xsd_type.get_instance_type(element_data.attributes, converter) + except (KeyError, TypeError) as err: + errors.append(err) attribute_group = self.get_attributes(xsd_type) for result in attribute_group.iter_encode(element_data.attributes, validation, **kwargs): @@ -801,6 +786,9 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) """ return self.name != other.name or self.type is other.type + def is_dynamic_consistent(self, other, xsd_type=None): + return self.name != other.name or xsd_type.is_dynamic_consistent(other.type) + class Xsd11Element(XsdElement): """ @@ -826,7 +814,6 @@ class Xsd11Element(XsdElement): Content: (annotation?, ((simpleType | complexType)?, alternative*, (unique | key | keyref)*)) """ - alternatives = () _target_namespace = None def _parse(self): @@ -964,6 +951,27 @@ class Xsd11Element(XsdElement): warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) return True + def is_dynamic_consistent(self, other, xsd_type=None): + if self.name == other.name: + e = self + else: + for e in self.iter_substitutes(): + if e.name == other.name: + break + else: + return True + + if xsd_type is None: + xsd_type = e.type + if len(e.alternatives) != len(other.alternatives): + return False + elif not xsd_type.is_dynamic_consistent(other.type): + return False + elif not all(any(a == x for x in other.alternatives) for a in e.alternatives) or \ + not all(any(a == x for x in e.alternatives) for a in other.alternatives): + msg = "Maybe a not equivalent type table between elements %r and %r." % (self, other) + warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) + return True class XsdAlternative(XsdComponent): """ diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 9a93394..e4d8af8 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -12,15 +12,17 @@ This module contains classes for XML Schema model groups. """ from __future__ import unicode_literals +import warnings from ..compat import unicode_type from ..exceptions import XMLSchemaValueError from ..etree import etree_element from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ - XSD_CHOICE, XSD_ELEMENT, XSD_ANY + XSD_CHOICE, XSD_ELEMENT, XSD_ANY, XSI_TYPE from xmlschema.helpers import get_qname, local_name -from .exceptions import XMLSchemaValidationError, XMLSchemaModelError, XMLSchemaChildrenValidationError +from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError, \ + XMLSchemaTypeTableWarning from .xsdbase import ValidationMixin, XsdComponent, XsdType from .elements import XsdElement from .wildcards import XsdAnyElement, Xsd11AnyElement @@ -479,6 +481,62 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: return other_max_occurs >= max_occurs * self.max_occurs + def check_dynamic_context(self, elem, xsd_element, converter): + if isinstance(xsd_element, XsdAnyElement): + if xsd_element.process_contents == 'skip': + return + + try: + xsd_element = self.maps.lookup_element(elem.tag) + except LookupError: + alternatives = () + try: + xsd_type = self.any_type.get_instance_type(elem.attrib, converter) + except KeyError: + return + else: + alternatives = xsd_element.alternatives + try: + xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) + except KeyError: + xsd_type = xsd_element.type + + elif XSI_TYPE not in elem.attrib: + return + else: + alternatives = xsd_element.alternatives + try: + xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) + except KeyError: + xsd_type = xsd_element.type + + # If it's a restriction the context is the base_type's group + group = self.restriction if self.restriction is not None else self + + # Dynamic EDC check of matched element + for e in filter(lambda x: isinstance(x, XsdElement), group.iter_elements()): + if e.name == elem.tag: + pass + else: + for e in e.iter_substitutes(): + if e.name == elem.tag: + break + else: + continue + + if len(e.alternatives) != len(alternatives): + pass + elif not xsd_type.is_dynamic_consistent(e.type): + pass + elif not all(any(a == x for x in alternatives) for a in e.alternatives) or \ + not all(any(a == x for x in e.alternatives) for a in alternatives): + msg = "Maybe a not equivalent type table between elements %r and %r." % (self, xsd_element) + warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) + continue + + reason = "%r that matches %r is not consistent with local declaration %r" + raise XMLSchemaValidationError(self, reason % (elem, xsd_element, e)) + def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs): """ Creates an iterator for decoding an Element content. @@ -546,20 +604,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: continue break - elif isinstance(xsd_element, XsdAnyElement) and xsd_element.process_contents != 'skip': - try: - matched_element = self.maps.lookup_element(child.tag) - except LookupError: - pass - else: - # If it's a restriction the context is the base_type's group - group = self.restriction if self.restriction is not None else self - # EDC check of matched element - for e in filter(lambda x: isinstance(x, XsdElement), group.iter_elements()): - if not matched_element.is_consistent(e): - msg = "%r that matches %r is not consistent with local declaration %r" - raise XMLSchemaModelError(self, msg % (child, xsd_element, e)) + try: + self.check_dynamic_context(child, xsd_element, converter) + except XMLSchemaValidationError as err: + yield self.validation_error(validation, err, elem, **kwargs) for particle, occurs, expected in model.advance(True): errors.append((index, particle, occurs, expected)) @@ -855,7 +904,7 @@ class Xsd11Group(XsdGroup): for w1 in filter(lambda x: isinstance(x, XsdAnyElement), base_items): for w2 in wildcards: if w1.process_contents == w2.process_contents and w1.occurs == w2.occurs: - w2.extend(w1) + w2.union(w1) w2.extended = True break else: diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index e0cbba6..08f2855 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -23,8 +23,8 @@ from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ - XSD_ERROR, XSD_ASSERT -from ..helpers import get_qname, local_name, get_xsd_derivation_attribute + XSD_ERROR, XSD_ASSERT, XSI_TYPE +from ..helpers import get_qname, local_name, qname_to_extended, get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ XMLSchemaDecodeError, XMLSchemaParseError @@ -387,6 +387,13 @@ class XsdSimpleType(XsdType, ValidationMixin): def get_facet(self, tag): return self.facets.get(tag) + def get_instance_type(self, attrs, namespaces): + type_qname = qname_to_extended(attrs[XSI_TYPE], namespaces) + xsi_type = self.maps.lookup_type(type_qname) + if not xsi_type.is_derived(self): + raise XMLSchemaValueError("%r is not a derived type of %r" % (xsi_type, self)) + return xsi_type + # # simpleType's derived classes: @@ -878,6 +885,10 @@ class XsdUnion(XsdSimpleType): def is_list(self): return all(mt.is_list() for mt in self.member_types) + def is_dynamic_consistent(self, other): + return other.is_derived(self) or hasattr(other, 'member_types') and \ + any(mt1.is_derived(mt2) for mt1 in other.member_types for mt2 in self.member_types) + def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 74cff18..59a26d1 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -240,14 +240,17 @@ class XsdWildcard(XsdComponent, ValidationMixin): else: return all(ns in other.namespace for ns in self.namespace) - def extend(self, other): + def union(self, other): """Extends the XSD wildcard to include the namespace of another XSD wildcard.""" + if self.not_qname: + self.not_qname = [qname for qname in self.not_qname if qname in other.not_qname] + if self.not_namespace: if other.not_namespace: self.not_namespace = [ns for ns in self.not_namespace if ns in other.not_namespace] - elif other.namespace == '##any': + elif '##any' in other.namespace: self.not_namespace = () - elif other.namespace != '##other': + elif '##other' not in other.namespace: self.not_namespace = [ns for ns in self.not_namespace if ns not in other.namespace] elif other.target_namespace in self.not_namespace: self.not_namespace = ['', other.target_namespace] if other.target_namespace else [''] @@ -259,9 +262,9 @@ class XsdWildcard(XsdComponent, ValidationMixin): return elif other.not_namespace: - if self.namespace == '##any': + if '##any' in self.namespace: return - elif self.namespace != '##other': + elif '##other' not in self.namespace: self.not_namespace = [ns for ns in other.not_namespace if ns not in self.namespace] elif self.target_namespace in other.not_namespace: self.not_namespace = ['', self.target_namespace] if self.target_namespace else [''] @@ -301,6 +304,59 @@ class XsdWildcard(XsdComponent, ValidationMixin): self.namespace = [] self.not_namespace = ['', w1.target_namespace] if w1.target_namespace else [''] + def intersection(self, other): + """Intersects the XSD wildcard with another XSD wildcard.""" + if self.not_qname: + self.not_qname.extend([qname for qname in other.not_qname if qname in self.not_qname]) + else: + self.not_qname = [qname for qname in other.not_qname] + + if self.not_namespace: + if other.not_namespace: + self.not_namespace.extend(ns for ns in other.not_namespace if ns not in self.not_namespace) + elif '##any' in other.namespace: + pass + elif '##other' not in other.namespace: + self.namespace = [ns for ns in other.namespace if ns not in self.not_namespace] + self.not_namespace = () + else: + if other.target_namespace not in self.not_namespace: + self.not_namespace.append(other.target_namespace) + if '' not in self.not_namespace: + self.not_namespace.append('') + return + + elif other.not_namespace: + if '##any' in self.namespace: + self.not_namespace = [ns for ns in other.not_namespace] + self.namespace = () + elif '##other' not in self.namespace: + self.namespace = [ns for ns in self.namespace if ns not in other.not_namespace] + else: + self.not_namespace = [ns for ns in other.not_namespace] + if self.target_namespace not in self.not_namespace: + self.not_namespace.append(self.target_namespace) + if '' not in self.not_namespace: + self.not_namespace.append('') + self.namespace = () + return + + if self.namespace == other.namespace: + return + elif '##any' in other.namespace: + return + elif '##any' in self.namespace: + self.namespace = other.namespace[:] + elif '##other' in self.namespace: + self.namespace = [ns for ns in other.namespace if ns not in ('', self.target_namespace)] + elif '##other' not in other.namespace: + self.namespace = [ns for ns in self.namespace if ns in other.namespace] + else: + if other.target_namespace in self.namespace: + self.namespace.remove(other.target_namespace) + if '' in self.namespace: + self.namespace.remove('') + def iter_decode(self, source, validation='lax', **kwargs): raise NotImplementedError @@ -382,11 +438,11 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): return iter(()) def iter_decode(self, elem, validation='lax', **kwargs): - if self.process_contents == 'skip': - return - namespace = get_namespace(elem.tag) if self.is_namespace_allowed(namespace): + if self.process_contents == 'skip': + return + self._load_namespace(namespace) try: xsd_element = self.maps.lookup_element(elem.tag) @@ -500,11 +556,11 @@ class XsdAnyAttribute(XsdWildcard): pass def iter_decode(self, attribute, validation='lax', **kwargs): - if self.process_contents == 'skip': - return - name, value = attribute if self.is_matching(name): + if self.process_contents == 'skip': + return + self._load_namespace(get_namespace(name)) try: xsd_attribute = self.maps.lookup_attribute(name) diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 25726dd..55d8caf 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -633,6 +633,10 @@ class XsdType(XsdComponent): def is_derived(self, other, derivation=None): raise NotImplementedError + def is_dynamic_consistent(self, other): + return other.is_derived(self) or hasattr(other, 'member_types') and \ + any(mt.is_derived(self) for mt in other.member_types) + def is_key(self): return self.name == XSD_ID or self.is_derived(self.maps.types[XSD_ID]) From 7b696fbabbda0bb9d3e2e6bc5caf4f1d9b792596 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 18 Sep 2019 09:41:01 +0200 Subject: [PATCH 12/36] Fix wildcard union and add tests for wildcard intersection --- xmlschema/tests/validators/test_wildcards.py | 122 +++++++++++++++++-- xmlschema/validators/wildcards.py | 46 ++++--- 2 files changed, 131 insertions(+), 37 deletions(-) diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py index 0364f9b..b834028 100644 --- a/xmlschema/tests/validators/test_wildcards.py +++ b/xmlschema/tests/validators/test_wildcards.py @@ -158,31 +158,127 @@ class TestXsd11Wildcards(TestXsdWildcards): self.assertFalse(any2.is_restriction(any1)) self.assertTrue(any3.is_restriction(any1)) - def test_extend(self): + def test_wildcard_union(self): schema = self.schema_class(""" - - - - + + + + + + + """) - any1, any2, any3, any4 = schema.groups['group1'][:] - + # + any1, any2 = schema.groups['group1'][:2] self.assertListEqual(any1.namespace, ['tns1']) any1.union(any2) self.assertListEqual(any1.namespace, ['tns1', 'tns2']) - self.assertListEqual(any3.namespace, []) - self.assertListEqual(any3.not_namespace, ['tns1']) - any3.union(any4) - self.assertListEqual(any3.not_namespace, ['tns1']) - any4.union(any3) - self.assertListEqual(any4.not_namespace, ['tns1']) + # + any1, any2 = schema.groups['group1'][2:4] + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + any1.union(any2) + self.assertListEqual(any1.not_namespace, ['tns1']) + any2.union(any1) + self.assertListEqual(any2.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][4:6] + any1.union(any2) + self.assertEqual(any1.namespace, ('##any',)) + self.assertEqual(any1.not_namespace, ()) + + # + any1, any2 = schema.groups['group1'][6:8] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][8:10] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][10:12] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['', 'tns1']) + + # + any1, any2 = schema.groups['group1'][12:14] + any1.union(any2) + self.assertListEqual(any1.namespace, ['##any']) + self.assertListEqual(any1.not_namespace, []) + + def test_wildcard_intersection(self): + schema = self.schema_class(""" + + + + + + + + + + + + + """) + + # + any1, any2 = schema.groups['group1'][:2] + self.assertListEqual(any1.namespace, ['tns1']) + any1.intersection(any2) + self.assertListEqual(any1.namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][2:4] + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + any1.intersection(any2) + self.assertListEqual(any1.not_namespace, ['tns1', 'tns2']) + any2.intersection(any1) + self.assertListEqual(any2.not_namespace, ['tns1', 'tns2']) + + # + any1, any2 = schema.groups['group1'][4:6] + any1.intersection(any2) + self.assertEqual(any1.namespace, []) + self.assertEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][6:8] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1', '']) + + # + any1, any2 = schema.groups['group1'][8:10] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1', '']) + + # + any1, any2 = schema.groups['group1'][10:12] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['', 'tns1']) + + # + any1, any2 = schema.groups['group1'][12:14] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns2', 'tns1', '']) def test_open_content_mode_interleave(self): schema = self.check_schema(""" diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 59a26d1..5800101 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -241,7 +241,9 @@ class XsdWildcard(XsdComponent, ValidationMixin): return all(ns in other.namespace for ns in self.namespace) def union(self, other): - """Extends the XSD wildcard to include the namespace of another XSD wildcard.""" + """ + Update an XSD wildcard with the union of itself and another XSD wildcard. + """ if self.not_qname: self.not_qname = [qname for qname in self.not_qname if qname in other.not_qname] @@ -249,13 +251,14 @@ class XsdWildcard(XsdComponent, ValidationMixin): if other.not_namespace: self.not_namespace = [ns for ns in self.not_namespace if ns in other.not_namespace] elif '##any' in other.namespace: - self.not_namespace = () - elif '##other' not in other.namespace: - self.not_namespace = [ns for ns in self.not_namespace if ns not in other.namespace] - elif other.target_namespace in self.not_namespace: - self.not_namespace = ['', other.target_namespace] if other.target_namespace else [''] + self.not_namespace = [] + self.namespace = ['##any'] + return + elif '##other' in other.namespace: + not_namespace = ('', other.target_namespace) + self.not_namespace = [ns for ns in self.not_namespace if ns in not_namespace] else: - self.not_namespace = () + self.not_namespace = [ns for ns in self.not_namespace if ns not in other.namespace] if not self.not_namespace: self.namespace = ['##any'] @@ -264,15 +267,13 @@ class XsdWildcard(XsdComponent, ValidationMixin): elif other.not_namespace: if '##any' in self.namespace: return - elif '##other' not in self.namespace: - self.not_namespace = [ns for ns in other.not_namespace if ns not in self.namespace] - elif self.target_namespace in other.not_namespace: - self.not_namespace = ['', self.target_namespace] if self.target_namespace else [''] + elif '##other' in self.namespace: + not_namespace = ('', self.target_namespace) + self.not_namespace = [ns for ns in other.not_namespace if ns in not_namespace] else: - self.not_namespace = () + self.not_namespace = [ns for ns in other.not_namespace if ns not in self.namespace] - if not self.not_namespace: - self.namespace = ['##any'] + self.namespace = ['##any'] if not self.not_namespace else [] return if '##any' in self.namespace or self.namespace == other.namespace: @@ -288,12 +289,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): self.namespace.extend(ns for ns in other.namespace if ns not in self.namespace) return - if w2.not_namespace: - self.not_namespace = [ns for ns in w2.not_namespace] - if w1.target_namespace not in self.not_namespace: - self.not_namespace.append(w1.target_namespace) - self.namespace = [] - elif w1.target_namespace in w2.namespace and '' in w2.namespace: + if w1.target_namespace in w2.namespace and '' in w2.namespace: self.namespace = ['##any'] elif '' not in w2.namespace and w1.target_namespace == w2.target_namespace: self.namespace = ['##other'] @@ -305,7 +301,9 @@ class XsdWildcard(XsdComponent, ValidationMixin): self.not_namespace = ['', w1.target_namespace] if w1.target_namespace else [''] def intersection(self, other): - """Intersects the XSD wildcard with another XSD wildcard.""" + """ + Update an XSD wildcard with the intersection of itself and another XSD wildcard. + """ if self.not_qname: self.not_qname.extend([qname for qname in other.not_qname if qname in self.not_qname]) else: @@ -318,7 +316,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): pass elif '##other' not in other.namespace: self.namespace = [ns for ns in other.namespace if ns not in self.not_namespace] - self.not_namespace = () + self.not_namespace = [] else: if other.target_namespace not in self.not_namespace: self.not_namespace.append(other.target_namespace) @@ -329,7 +327,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): elif other.not_namespace: if '##any' in self.namespace: self.not_namespace = [ns for ns in other.not_namespace] - self.namespace = () + self.namespace = [] elif '##other' not in self.namespace: self.namespace = [ns for ns in self.namespace if ns not in other.not_namespace] else: @@ -338,7 +336,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): self.not_namespace.append(self.target_namespace) if '' not in self.not_namespace: self.not_namespace.append('') - self.namespace = () + self.namespace = [] return if self.namespace == other.namespace: From e4d9941eb3e84c5e11438b2e754cfec8ca8af949 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 18 Sep 2019 10:52:48 +0200 Subject: [PATCH 13/36] Fix wildcard restriction checking in case of notQName --- xmlschema/validators/wildcards.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 5800101..1ff1b20 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -209,7 +209,8 @@ class XsdWildcard(XsdComponent, ValidationMixin): elif other.not_qname: if not self.deny_qnames(x for x in other.not_qname if not x.startswith('##')): return False - elif any(not other.is_namespace_allowed(get_namespace(x)) for x in self.not_qname if not x.startswith('##')): + elif any(not other.is_namespace_allowed(get_namespace(x)) + for x in self.not_qname if not x.startswith('##')): return False if self.not_namespace: @@ -244,8 +245,11 @@ class XsdWildcard(XsdComponent, ValidationMixin): """ Update an XSD wildcard with the union of itself and another XSD wildcard. """ - if self.not_qname: - self.not_qname = [qname for qname in self.not_qname if qname in other.not_qname] + if not self.not_qname: + self.not_qname = other.not_qname[:] + else: + self.not_qname = [x for x in self.not_qname if x in other.not_qname or + not other.is_namespace_allowed(get_namespace(x))] if self.not_namespace: if other.not_namespace: From 0480e4bee8b423e903b870916d5fbcccd18f08c1 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 18 Sep 2019 23:24:41 +0200 Subject: [PATCH 14/36] Add is_override() to XsdComponent for XSD 1.1 schemas - Change is_global() from property to function --- xmlschema/converters.py | 4 ++-- xmlschema/tests/test_meta.py | 8 +++---- xmlschema/tests/validators/test_wildcards.py | 9 ++++++++ xmlschema/validators/assertions.py | 2 +- xmlschema/validators/complex_types.py | 2 +- xmlschema/validators/elements.py | 24 -------------------- xmlschema/validators/groups.py | 22 ++++++++---------- xmlschema/validators/schema.py | 17 ++++++++------ xmlschema/validators/simple_types.py | 4 ++-- xmlschema/validators/wildcards.py | 18 +++++---------- xmlschema/validators/xsdbase.py | 18 ++++++++++----- xmlschema/xpath.py | 6 ++--- 12 files changed, 59 insertions(+), 75 deletions(-) diff --git a/xmlschema/converters.py b/xmlschema/converters.py index b317039..d7deaa3 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -261,7 +261,7 @@ class XMLSchemaConverter(NamespaceMapper): :return: a data structure containing the decoded data. """ result_dict = self.dict() - if level == 0 and xsd_element.is_global and not self.strip_namespaces and self: + if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: schema_namespaces = set(xsd_element.namespaces.values()) result_dict.update( ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) for k, v in self.items() @@ -899,7 +899,7 @@ class JsonMLConverter(XMLSchemaConverter): for name, value, _ in self.map_content(data.content) ]) - if level == 0 and xsd_element.is_global and not self.strip_namespaces and self: + if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: attributes.update([('xmlns:%s' % k if k else 'xmlns', v) for k, v in self.items()]) if attributes: result_list.insert(1, attributes) diff --git a/xmlschema/tests/test_meta.py b/xmlschema/tests/test_meta.py index 2117923..1d047e5 100644 --- a/xmlschema/tests/test_meta.py +++ b/xmlschema/tests/test_meta.py @@ -286,7 +286,7 @@ class TestGlobalMaps(unittest.TestCase): self.assertEqual(len(XMLSchema10.meta_schema.maps.attribute_groups), 9) self.assertEqual(len(XMLSchema10.meta_schema.maps.groups), 18) self.assertEqual(len(XMLSchema10.meta_schema.maps.elements), 45) - self.assertEqual(len([e.is_global for e in XMLSchema10.meta_schema.maps.iter_globals()]), 200) + self.assertEqual(len([e.is_global() for e in XMLSchema10.meta_schema.maps.iter_globals()]), 200) self.assertEqual(len(XMLSchema10.meta_schema.maps.substitution_groups), 0) def test_xsd_11_globals(self): @@ -296,7 +296,7 @@ class TestGlobalMaps(unittest.TestCase): self.assertEqual(len(XMLSchema11.meta_schema.maps.attribute_groups), 10) self.assertEqual(len(XMLSchema11.meta_schema.maps.groups), 19) self.assertEqual(len(XMLSchema11.meta_schema.maps.elements), 51) - self.assertEqual(len([e.is_global for e in XMLSchema11.meta_schema.maps.iter_globals()]), 225) + self.assertEqual(len([e.is_global() for e in XMLSchema11.meta_schema.maps.iter_globals()]), 225) self.assertEqual(len(XMLSchema11.meta_schema.maps.substitution_groups), 1) def test_xsd_10_build(self): @@ -319,7 +319,7 @@ class TestGlobalMaps(unittest.TestCase): for g in XMLSchema10.meta_schema.maps.iter_globals(): for c in g.iter_components(): total_counter += 1 - if c.is_global: + if c.is_global(): global_counter += 1 self.assertEqual(global_counter, 200) self.assertEqual(total_counter, 901) @@ -330,7 +330,7 @@ class TestGlobalMaps(unittest.TestCase): for g in XMLSchema11.meta_schema.maps.iter_globals(): for c in g.iter_components(): total_counter += 1 - if c.is_global: + if c.is_global(): global_counter += 1 self.assertEqual(global_counter, 225) self.assertEqual(total_counter, 1051) diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py index b834028..4e77651 100644 --- a/xmlschema/tests/validators/test_wildcards.py +++ b/xmlschema/tests/validators/test_wildcards.py @@ -231,6 +231,8 @@ class TestXsd11Wildcards(TestXsdWildcards): + + """) @@ -280,6 +282,13 @@ class TestXsd11Wildcards(TestXsdWildcards): self.assertListEqual(any1.namespace, []) self.assertListEqual(any1.not_namespace, ['tns2', 'tns1', '']) + # + # + any1, any2 = schema.groups['group1'][14:16] + any1.intersection(any2) + self.assertListEqual(any1.namespace, ['']) + self.assertListEqual(any1.not_qname, ['##defined', 'qn1']) + def test_open_content_mode_interleave(self): schema = self.check_schema(""" diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 6ff2a9f..9cdb581 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -70,7 +70,7 @@ class XsdAssert(XsdComponent, ElementPathMixin): @property def built(self): - return self.token is not None and (self.base_type.is_global or self.base_type.built) + return self.token is not None and (self.base_type.parent is None or self.base_type.built) def parse_xpath_test(self): self.parser.schema = XMLSchemaProxy(self.schema, self) diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 58d2601..27ac766 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -719,7 +719,7 @@ class Xsd11ComplexType(XsdComplexType): # Add default attributes if self.schema.default_attributes is None: pass - elif self.default_attributes_apply: + elif self.default_attributes_apply and not self.is_override(): if self.redefine is None and any(k in self.attributes for k in self.schema.default_attributes): self.parse_error("at least a default attribute is already declared in the complex type") self.attributes.update( diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index c8f8930..894f4ac 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -786,9 +786,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) """ return self.name != other.name or self.type is other.type - def is_dynamic_consistent(self, other, xsd_type=None): - return self.name != other.name or xsd_type.is_dynamic_consistent(other.type) - class Xsd11Element(XsdElement): """ @@ -951,27 +948,6 @@ class Xsd11Element(XsdElement): warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) return True - def is_dynamic_consistent(self, other, xsd_type=None): - if self.name == other.name: - e = self - else: - for e in self.iter_substitutes(): - if e.name == other.name: - break - else: - return True - - if xsd_type is None: - xsd_type = e.type - if len(e.alternatives) != len(other.alternatives): - return False - elif not xsd_type.is_dynamic_consistent(other.type): - return False - elif not all(any(a == x for x in other.alternatives) for a in e.alternatives) or \ - not all(any(a == x for x in e.alternatives) for a in other.alternatives): - msg = "Maybe a not equivalent type table between elements %r and %r." % (self, other) - warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) - return True class XsdAlternative(XsdComponent): """ diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index e4d8af8..c3bfb69 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -516,26 +516,22 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): # Dynamic EDC check of matched element for e in filter(lambda x: isinstance(x, XsdElement), group.iter_elements()): if e.name == elem.tag: - pass + other = e else: - for e in e.iter_substitutes(): - if e.name == elem.tag: + for other in e.iter_substitutes(): + if other.name == elem.tag: break else: continue - if len(e.alternatives) != len(alternatives): - pass - elif not xsd_type.is_dynamic_consistent(e.type): - pass - elif not all(any(a == x for x in alternatives) for a in e.alternatives) or \ - not all(any(a == x for x in e.alternatives) for a in alternatives): + if len(other.alternatives) != len(alternatives) or \ + not xsd_type.is_dynamic_consistent(other.type): + reason = "%r that matches %r is not consistent with local declaration %r" + raise XMLSchemaValidationError(self, reason % (elem, xsd_element, other)) + elif not all(any(a == x for x in alternatives) for a in other.alternatives) or \ + not all(any(a == x for x in other.alternatives) for a in alternatives): msg = "Maybe a not equivalent type table between elements %r and %r." % (self, xsd_element) warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) - continue - - reason = "%r that matches %r is not consistent with local declaration %r" - raise XMLSchemaValidationError(self, reason % (elem, xsd_element, e)) def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs): """ diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index d46f5ac..e79825d 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -43,6 +43,7 @@ from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchema from .xsdbase import XSD_VALIDATION_MODES, XsdValidator, ValidationMixin, XsdComponent from .notations import XsdNotation from .identities import XsdKey, XsdKeyref, XsdUnique, Xsd11Key, Xsd11Unique, Xsd11Keyref +from .facets import XSD_11_FACETS from .simple_types import xsd_simple_type_factory, XsdUnion, XsdAtomicRestriction, \ Xsd11AtomicRestriction, Xsd11Union from .attributes import XsdAttribute, XsdAttributeGroup, Xsd11Attribute @@ -554,7 +555,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for e in xsd_element.iter(): if e is xsd_element or isinstance(e, XsdAnyElement): continue - elif e.ref or e.is_global: + elif e.ref or e.parent is None: if e.name in names: names.discard(e.name) if not names: @@ -1044,8 +1045,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if VC_FACET_AVAILABLE in elem.attrib: for qname in elem.attrib[VC_FACET_AVAILABLE].split(): try: - if self.resolve_qname(qname) in self.maps.types: - pass + if self.resolve_qname(qname) not in XSD_11_FACETS: + return False except XMLSchemaNamespaceError: pass except (KeyError, ValueError) as err: @@ -1054,12 +1055,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if VC_FACET_UNAVAILABLE in elem.attrib: for qname in elem.attrib[VC_FACET_UNAVAILABLE].split(): try: - if self.resolve_qname(qname) in self.maps.types: - pass + if self.resolve_qname(qname) not in XSD_11_FACETS: + break except XMLSchemaNamespaceError: - pass + break except (KeyError, ValueError) as err: - self.parse_error(str(err), elem) + self.parse_error(err, elem) + else: + return False return True diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 08f2855..8255466 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -739,7 +739,7 @@ class XsdList(XsdSimpleType): def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self - if not self.base_type.is_global: + if self.base_type.parent is not None: for obj in self.base_type.iter_components(xsd_classes): yield obj @@ -1154,7 +1154,7 @@ class XsdAtomicRestriction(XsdAtomic): def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self - if not self.base_type.is_global: + if self.base_type.parent is not None: for obj in self.base_type.iter_components(xsd_classes): yield obj diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 1ff1b20..75049f3 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -309,9 +309,9 @@ class XsdWildcard(XsdComponent, ValidationMixin): Update an XSD wildcard with the intersection of itself and another XSD wildcard. """ if self.not_qname: - self.not_qname.extend([qname for qname in other.not_qname if qname in self.not_qname]) + self.not_qname.extend(x for x in other.not_qname if x not in self.not_qname) else: - self.not_qname = [qname for qname in other.not_qname] + self.not_qname = [x for x in other.not_qname] if self.not_namespace: if other.not_namespace: @@ -398,9 +398,6 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): self._parse_particle(self.elem) self.xpath_proxy = XMLSchemaProxy(self.schema, self) - def is_emptiable(self): - return self.min_occurs == 0 or self.process_contents != 'strict' - def match(self, name, default_namespace=None, resolve=False, **kwargs): """ Returns the element wildcard if name is matching the name provided @@ -440,12 +437,11 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): return iter(()) def iter_decode(self, elem, validation='lax', **kwargs): - namespace = get_namespace(elem.tag) - if self.is_namespace_allowed(namespace): + if self.is_matching(elem.tag): if self.process_contents == 'skip': return - self._load_namespace(namespace) + self._load_namespace(get_namespace(elem.tag)) try: xsd_element = self.maps.lookup_element(elem.tag) except LookupError: @@ -652,8 +648,7 @@ class Xsd11AnyElement(XsdAnyElement): return False if '##defined' in self.not_qname and name in self.maps.elements: - if self.maps.elements[name].schema is self.schema: - return False + return False if group and '##definedSibling' in self.not_qname: if any(e.is_matching(name) for e in group.iter_elements() if not isinstance(e, XsdAnyElement)): @@ -705,8 +700,7 @@ class Xsd11AnyAttribute(XsdAnyAttribute): namespace = default_namespace if '##defined' in self.not_qname and name in self.maps.attributes: - if self.maps.attributes[name].schema is self.schema: - return False + return False return name not in self.not_qname and self.is_namespace_allowed(namespace) diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 55d8caf..fe623f9 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -17,10 +17,11 @@ import re from ..compat import PY3, string_base_type, unicode_type from ..exceptions import XMLSchemaValueError, XMLSchemaTypeError from ..qnames import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, XML_LANG, \ - XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID + XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID, XSD_OVERRIDE from ..helpers import get_qname, local_name, qname_to_prefixed from ..etree import etree_tostring, is_etree_element -from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchemaDecodeError, XMLSchemaEncodeError +from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, \ + XMLSchemaDecodeError, XMLSchemaEncodeError XSD_VALIDATION_MODES = {'strict', 'lax', 'skip'} @@ -252,11 +253,16 @@ class XsdComponent(XsdValidator): def xsd_version(self): return self.schema.XSD_VERSION - @property def is_global(self): - """Is `True` if the instance is a global component, `False` if it's local.""" + """Returns `True` if the instance is a global component, `False` if it's local.""" return self.parent is None + def is_override(self): + """Returns `True` if the instance is an override of a global component.""" + if self.parent is not None: + return False + return any(self.elem in x for x in self.schema.root if x.tag == XSD_OVERRIDE) + @property def schema_elem(self): """The reference element of the schema for the component instance.""" @@ -634,8 +640,8 @@ class XsdType(XsdComponent): raise NotImplementedError def is_dynamic_consistent(self, other): - return other.is_derived(self) or hasattr(other, 'member_types') and \ - any(mt.is_derived(self) for mt in other.member_types) + return self.is_derived(other) or hasattr(other, 'member_types') and \ + any(self.is_derived(mt) for mt in other.member_types) def is_key(self): return self.name == XSD_ID or self.is_derived(self.maps.types[XSD_ID]) diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index e60d2fa..ef40fe6 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -36,7 +36,7 @@ class XMLSchemaContext(XPathSchemaContext): if len(elem): context.size = len(elem) for context.position, context.item in enumerate(elem): - if context.item.is_global: + if context.item.parent is None: for item in safe_iter_descendants(context): yield item elif getattr(context.item, 'ref', None) is not None: @@ -64,7 +64,7 @@ class XMLSchemaContext(XPathSchemaContext): if len(elem): context.size = len(elem) for context.position, context.item in enumerate(elem): - if context.item.is_global: + if context.item.parent is None: for item in safe_iter_context(context): yield item elif getattr(context.item, 'ref', None) is not None: @@ -267,7 +267,7 @@ class ElementPathMixin(Sequence): if tag is None or elem.is_matching(tag): yield elem for child in elem: - if child.is_global: + if child.parent is None: for e in safe_iter(child): yield e elif getattr(child, 'ref', None) is not None: From a95dfe26fe696d9527f4149003e38815fe2f1948 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 19 Sep 2019 19:00:05 +0200 Subject: [PATCH 15/36] Add is_list() to XsdSimpleType and fix xs:QName length facets --- xmlschema/tests/validators/test_wildcards.py | 16 +++++++++++++++ xmlschema/validators/attributes.py | 21 ++++++++++++++------ xmlschema/validators/complex_types.py | 13 ++++++++---- xmlschema/validators/facets.py | 14 +++++++------ xmlschema/validators/simple_types.py | 8 ++++---- xmlschema/validators/wildcards.py | 18 +++++++++-------- 6 files changed, 62 insertions(+), 28 deletions(-) diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py index 4e77651..3af1516 100644 --- a/xmlschema/tests/validators/test_wildcards.py +++ b/xmlschema/tests/validators/test_wildcards.py @@ -110,6 +110,22 @@ class TestXsdWildcards(XsdValidatorTestCase): """) self.assertEqual(schema.types['taggedType'].attributes[None].namespace, ['']) + def test_namespace_variants(self): + schema = self.schema_class(""" + + + + + + + + """) + + any1 = schema.groups['group1'][0] + self.assertEqual(any1.namespace, ['urn:a']) + any2 = schema.groups['group1'][1] + self.assertEqual(any2.namespace, []) + class TestXsd11Wildcards(TestXsdWildcards): diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index 7256379..43665f6 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -381,7 +381,7 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): def _parse(self): super(XsdAttributeGroup, self)._parse() elem = self.elem - any_attribute = False + any_attribute = None attribute_group_refs = [] if elem.tag == XSD_ATTRIBUTE_GROUP: @@ -398,15 +398,19 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): attributes = ordered_dict_class() for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): - if any_attribute: + if any_attribute is not None: if child.tag == XSD_ANY_ATTRIBUTE: self.parse_error("more anyAttribute declarations in the same attribute group") else: self.parse_error("another declaration after anyAttribute") elif child.tag == XSD_ANY_ATTRIBUTE: - any_attribute = True - attributes[None] = self.schema.BUILDERS.any_attribute_class(child, self.schema, self) + any_attribute = self.schema.BUILDERS.any_attribute_class(child, self.schema, self) + if None in attributes: + attributes[None] = attributes[None].copy() + attributes[None].intersection(any_attribute) + else: + attributes[None] = any_attribute elif child.tag == XSD_ATTRIBUTE: attribute = self.schema.BUILDERS.attribute_class(child, self.schema, self) @@ -457,7 +461,8 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): elif name is not None: self.parse_error("multiple declaration for attribute {!r}".format(name)) else: - attributes[name].intersection(attr) + attributes[None] = attributes[None].copy() + attributes[None].intersection(attr) elif self.xsd_version == '1.0': self.parse_error("Circular reference found between attribute groups " @@ -498,7 +503,11 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): attr.type.normalize(attr.fixed) != base_attr.type.normalize(base_attr.fixed): self.parse_error("Attribute %r: derived attribute has a different fixed value" % name) - self._attribute_group.update(self.base_attributes.items()) + if self.redefine is not None: + pass # In case of redefinition do not copy base attributes + else: + self._attribute_group.update(self.base_attributes.items()) + elif self.redefine is not None and not attribute_group_refs: for name, attr in self._attribute_group.items(): if name is None: diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 27ac766..51d9bea 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -79,7 +79,7 @@ class XsdComplexType(XsdType, ValidationMixin): def __repr__(self): if self.name is not None: return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) - elif not hasattr(self, 'content_type'): + elif not hasattr(self, 'content_type') or not hasattr(self, 'attributes'): return '%s(id=%r)' % (self.__class__.__name__, id(self)) else: return '%s(content=%r, attributes=%r)' % ( @@ -717,13 +717,18 @@ class Xsd11ComplexType(XsdComplexType): self.default_attributes_apply = False # Add default attributes - if self.schema.default_attributes is None: + if self.redefine is None: + default_attributes = self.schema.default_attributes + else: + default_attributes = self.redefine.schema.default_attributes + + if default_attributes is None: pass elif self.default_attributes_apply and not self.is_override(): - if self.redefine is None and any(k in self.attributes for k in self.schema.default_attributes): + if self.redefine is None and any(k in self.attributes for k in default_attributes): self.parse_error("at least a default attribute is already declared in the complex type") self.attributes.update( - (k, v) for k, v in self.schema.default_attributes.items() if k not in self.attributes + (k, v) for k, v in default_attributes.items() if k not in self.attributes ) def _parse_complex_content_extension(self, elem, base_type): diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 3595941..2a3bd7a 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -16,10 +16,10 @@ import re from elementpath import XPath2Parser, ElementPathError, datatypes from ..compat import unicode_type, MutableSequence -from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, XSD_WHITE_SPACE, \ - XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, \ - XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ASSERTION, XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, \ - XSD_BASE64_BINARY, XSD_HEX_BINARY +from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ + XSD_WHITE_SPACE, XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, XSD_MIN_INCLUSIVE, \ + XSD_MIN_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ASSERTION, \ + XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, XSD_BASE64_BINARY, XSD_HEX_BINARY, XSD_QNAME from ..regex import get_python_regex from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError @@ -150,6 +150,8 @@ class XsdLengthFacet(XsdFacet): self.validator = self.hex_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_length_validator + elif primitive_type.name == XSD_QNAME: + pass # See: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4009 else: self.validator = self.length_validator @@ -193,7 +195,7 @@ class XsdMinLengthFacet(XsdFacet): self.validator = self.hex_min_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_min_length_validator - else: + elif primitive_type.name != XSD_QNAME: self.validator = self.min_length_validator def min_length_validator(self, x): @@ -236,7 +238,7 @@ class XsdMaxLengthFacet(XsdFacet): self.validator = self.hex_max_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_max_length_validator - else: + elif primitive_type.name != XSD_QNAME: self.validator = self.max_length_validator def max_length_validator(self, x): diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 8255466..b2d3da2 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -293,6 +293,10 @@ class XsdSimpleType(XsdType, ValidationMixin): def is_complex(): return False + @staticmethod + def is_list(): + return False + def is_empty(self): return self.max_length == 0 @@ -463,10 +467,6 @@ class XsdAtomic(XsdSimpleType): def is_atomic(): return True - @staticmethod - def is_list(): - return False - class XsdAtomicBuiltin(XsdAtomic): """ diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 75049f3..981bd2d 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -36,22 +36,24 @@ class XsdWildcard(XsdComponent, ValidationMixin): super(XsdWildcard, self).__init__(elem, schema, parent) def __repr__(self): - if self.namespace: - return '%s(namespace=%r, process_contents=%r)' % ( - self.__class__.__name__, self.namespace, self.process_contents - ) - else: + if self.not_namespace: return '%s(not_namespace=%r, process_contents=%r)' % ( self.__class__.__name__, self.not_namespace, self.process_contents ) + else: + return '%s(namespace=%r, process_contents=%r)' % ( + self.__class__.__name__, self.namespace, self.process_contents + ) def _parse(self): super(XsdWildcard, self)._parse() # Parse namespace and processContents namespace = self.elem.get('namespace', '##any').strip() - if namespace == '##any' or namespace == '': + if namespace == '##any': pass + elif not namespace: + self.namespace = [] # an empty value means no namespace allowed! elif namespace == '##other': self.namespace = [namespace] elif namespace == '##local': @@ -163,9 +165,9 @@ class XsdWildcard(XsdComponent, ValidationMixin): def is_namespace_allowed(self, namespace): if self.not_namespace: return namespace not in self.not_namespace - elif self.namespace[0] == '##any' or namespace == XSI_NAMESPACE: + elif '##any' in self.namespace or namespace == XSI_NAMESPACE: return True - elif self.namespace[0] == '##other': + elif '##other' in self.namespace: return namespace and namespace != self.target_namespace else: return namespace in self.namespace From 23390a1ed7f18366bf3cdf1680525be2c680d891 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 20 Sep 2019 11:49:01 +0200 Subject: [PATCH 16/36] Fix xs:keyref ref building and add count_digits() helper --- xmlschema/helpers.py | 35 ++++++++++++++++++++++++++++++ xmlschema/tests/test_helpers.py | 31 +++++++++++++++++++++++++- xmlschema/validators/facets.py | 14 ++++++++---- xmlschema/validators/globals_.py | 8 +++---- xmlschema/validators/identities.py | 2 +- 5 files changed, 80 insertions(+), 10 deletions(-) diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index 4ecb649..a2c75fe 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -12,7 +12,9 @@ This module contains various helper functions and classes. """ import re +from decimal import Decimal +from .compat import string_base_type from .exceptions import XMLSchemaValueError, XMLSchemaTypeError from .qnames import XSD_ANNOTATION @@ -178,6 +180,39 @@ def get_xsd_form_attribute(elem, attribute): return value +def count_digits(number): + """ + Counts the digits of a number. + + :param number: an int or a float or a Decimal or a string representing a number. + :return: a couple with the number of digits of the integer part and \ + the number of digits of the decimal part. + """ + if isinstance(number, string_base_type): + number = str(Decimal(number)).lstrip('-+') + else: + number = str(number).lstrip('-+') + + if 'E' in number: + significand, _, exponent = number.partition('E') + elif 'e' in number: + significand, _, exponent = number.partition('e') + elif '.' not in number: + return len(number.lstrip('0')), 0 + else: + integer_part, _, decimal_part = number.partition('.') + return len(integer_part.lstrip('0')), len(decimal_part.rstrip('0')) + + significand = significand.strip('0') + exponent = int(exponent) + + num_digits = len(significand) - 1 if '.' in significand else len(significand) + if exponent > 0: + return num_digits + exponent, 0 + else: + return 0, num_digits - exponent - 1 + + class ParticleCounter(object): """ An helper class for counting total min/max occurrences of XSD particles. diff --git a/xmlschema/tests/test_helpers.py b/xmlschema/tests/test_helpers.py index 7109a9e..1e96cb1 100644 --- a/xmlschema/tests/test_helpers.py +++ b/xmlschema/tests/test_helpers.py @@ -15,13 +15,14 @@ This module runs tests on various internal helper functions. from __future__ import unicode_literals import unittest +import decimal import xml.etree.ElementTree as ElementTree from xmlschema import XMLSchema, XMLSchemaParseError from xmlschema.etree import etree_element, prune_etree from xmlschema.namespaces import XSD_NAMESPACE, XSI_NAMESPACE from xmlschema.helpers import get_xsd_annotation, get_namespace, get_qname, local_name, \ - qname_to_prefixed, get_xsd_derivation_attribute + qname_to_prefixed, get_xsd_derivation_attribute, count_digits from xmlschema.qnames import XSI_TYPE, XSD_SCHEMA, XSD_ELEMENT, XSD_SIMPLE_TYPE, XSD_ANNOTATION @@ -139,6 +140,34 @@ class TestHelpers(unittest.TestCase): elem.append(etree_element(XSD_SIMPLE_TYPE)) self.assertEqual(component._parse_child_component(elem), elem[2]) + def test_count_digits_function(self): + self.assertEqual(count_digits(10), (2, 0)) + self.assertEqual(count_digits(-10), (2, 0)) + + self.assertEqual(count_digits(081.2), (2, 1)) + self.assertEqual(count_digits(-081.200), (2, 1)) + self.assertEqual(count_digits(0.51), (0, 2)) + self.assertEqual(count_digits(-0.510), (0, 2)) + self.assertEqual(count_digits(-0.510), (0, 2)) + + self.assertEqual(count_digits(decimal.Decimal('100.0')), (3, 0)) + self.assertEqual(count_digits(decimal.Decimal('100.01')), (3, 2)) + self.assertEqual(count_digits('100.01'), (3, 2)) + + self.assertEqual(count_digits(decimal.Decimal('100.0E+4')), (7, 0)) + self.assertEqual(count_digits(decimal.Decimal('100.00001E+4')), (7, 1)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E4')), (7, 0)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E12')), (15, 0)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E19')), (22, 0)) + + self.assertEqual(count_digits(decimal.Decimal('100.0E-4')), (0, 2)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-4')), (0, 2)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-8')), (0, 6)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-9')), (0, 7)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-12')), (0, 10)) + self.assertEqual(count_digits(decimal.Decimal('100.10E-4')), (0, 5)) + self.assertEqual(count_digits(decimal.Decimal('0100.10E-12')), (0, 13)) + class TestElementTreeHelpers(unittest.TestCase): diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 2a3bd7a..31ea622 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -13,6 +13,7 @@ This module contains declarations and classes for XML Schema constraint facets. """ from __future__ import unicode_literals import re +import operator from elementpath import XPath2Parser, ElementPathError, datatypes from ..compat import unicode_type, MutableSequence @@ -20,6 +21,7 @@ from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION XSD_WHITE_SPACE, XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, XSD_MIN_INCLUSIVE, \ XSD_MIN_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ASSERTION, \ XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, XSD_BASE64_BINARY, XSD_HEX_BINARY, XSD_QNAME +from ..helpers import count_digits from ..regex import get_python_regex from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError @@ -428,8 +430,10 @@ class XsdTotalDigitsFacet(XsdFacet): self.validator = self.total_digits_validator def total_digits_validator(self, x): - if len([d for d in str(x).strip('0') if d.isdigit()]) > self.value: - yield XMLSchemaValidationError(self, x, "the number of digits is greater than %r." % self.value) + if operator.add(*count_digits(x)) > self.value: + yield XMLSchemaValidationError( + self, x, "the number of digits is greater than %r." % self.value + ) class XsdFractionDigitsFacet(XsdFacet): @@ -460,8 +464,10 @@ class XsdFractionDigitsFacet(XsdFacet): self.validator = self.fraction_digits_validator def fraction_digits_validator(self, x): - if len(str(x).strip('0').partition('.')[2]) > self.value: - yield XMLSchemaValidationError(self, x, "the number of fraction digits is greater than %r." % self.value) + if count_digits(x)[1] > self.value: + yield XMLSchemaValidationError( + self, x, "the number of fraction digits is greater than %r." % self.value + ) class XsdExplicitTimezoneFacet(XsdFacet): diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index 1fabe9c..3b39f74 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -489,6 +489,10 @@ class XsdGlobals(XsdValidator): for group in schema.iter_components(XsdGroup): group.build() + # Builds xs:keyref's key references + for constraint in filter(lambda x: isinstance(x, XsdKeyref), self.identities.values()): + constraint.parse_refer() + # Build XSD 1.1 identity references and assertions if self.xsd_version != '1.0': for schema in filter(lambda x: x.meta_schema is not None, not_built_schemas): @@ -510,10 +514,6 @@ class XsdGlobals(XsdValidator): for assertion in schema.iter_components(XsdAssert): assertion.parse_xpath_test() - # Builds xs:keyref's key references - for constraint in filter(lambda x: isinstance(x, XsdKeyref), self.identities.values()): - constraint.parse_refer() - self.check(filter(lambda x: x.meta_schema is not None, not_built_schemas), self.validation) def check(self, schemas=None, validation='strict'): diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 61b84f2..332fb72 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -294,7 +294,7 @@ class XsdKeyref(XsdIdentity): @property def built(self): - return self.selector is not None and self.refer is not None + return self.selector is not None and isinstance(self.refer, XsdIdentity) def get_refer_values(self, elem): values = set() From b1663c5550d60bb24b5b6126c11fe46580cfe8b7 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 21 Sep 2019 09:11:58 +0200 Subject: [PATCH 17/36] Add root_type property to XsdType - Atomic's property primitive_type transformed to attribute - Optimized base_type assignement --- xmlschema/tests/test_w3c_suite.py | 3 ++ xmlschema/validators/attributes.py | 4 ++ xmlschema/validators/complex_types.py | 36 ++++++++++++------ xmlschema/validators/elements.py | 5 ++- xmlschema/validators/identities.py | 29 +++++++------- xmlschema/validators/schema.py | 15 +++++--- xmlschema/validators/simple_types.py | 46 +++++++++------------- xmlschema/validators/xsdbase.py | 55 ++++++++++++++++++++------- 8 files changed, 121 insertions(+), 72 deletions(-) diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 7294758..4ad14a8 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -107,6 +107,9 @@ SKIPPED_TESTS = { '../saxonData/XmlVersions/xv006.n02.xml', # 14855: invalid character 𐀀 (valid in XML 1.1) '../saxonData/XmlVersions/xv008.v01.xml', # 14857 '../saxonData/XmlVersions/xv008.n01.xml', # 14857 + + # Skip for TODO + '../sunData/combined/005/test.1.v.xml', # 3959: is valid but needs equality operators (#cos-ct-derived-ok) } XSD11_SKIPPED_TESTS = { diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index 43665f6..f2eb2b3 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -536,6 +536,10 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): self.clear() self._attribute_group.update(attributes) + if None in self._attribute_group and None not in attributes and self.derivation == 'restriction': + wildcard = self._attribute_group[None].copy() + wildcard.namespace = wildcard.not_namespace = wildcard.not_qname = () + self._attribute_group[None] = wildcard if self.xsd_version == '1.0': has_key = False diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 51d9bea..f56c685 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -56,7 +56,6 @@ class XsdComplexType(XsdType, ValidationMixin): _ADMITTED_TAGS = {XSD_COMPLEX_TYPE, XSD_RESTRICTION} _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE} _block = None - _derivation = None @staticmethod def normalize(text): @@ -149,11 +148,20 @@ class XsdComplexType(XsdType, ValidationMixin): if derivation_elem is None: return - self.base_type = self._parse_base_type(derivation_elem) + self.base_type = base_type = self._parse_base_type(derivation_elem) + + block = base_type.block + if self._block is None and block: + self._block = block + if derivation_elem.tag == XSD_RESTRICTION: - self._parse_simple_content_restriction(derivation_elem, self.base_type) + self._parse_simple_content_restriction(derivation_elem, base_type) + if base_type.blocked or 'restriction' in block and base_type != self: + self.blocked = True else: - self._parse_simple_content_extension(derivation_elem, self.base_type) + self._parse_simple_content_extension(derivation_elem, base_type) + if base_type.blocked or 'extension' in block and base_type != self: + self.blocked = True if content_elem is not elem[-1]: k = 2 if content_elem is not elem[0] else 1 @@ -182,10 +190,18 @@ class XsdComplexType(XsdType, ValidationMixin): elif self.redefine: self.base_type = self.redefine + block = base_type.block + if self._block is None and block: + self._block = block + if derivation_elem.tag == XSD_RESTRICTION: self._parse_complex_content_restriction(derivation_elem, base_type) + if base_type.blocked or 'restriction' in block and base_type != self: + self.blocked = True else: self._parse_complex_content_extension(derivation_elem, base_type) + if base_type.blocked or 'extension' in block and base_type != self: + self.blocked = True if content_elem is not elem[-1]: k = 2 if content_elem is not elem[0] else 1 @@ -232,8 +248,8 @@ class XsdComplexType(XsdType, ValidationMixin): return derivation = local_name(derivation_elem.tag) - if self._derivation is None: - self._derivation = derivation == 'extension' + if self.derivation is None: + self.derivation = derivation elif self.redefine is None: raise XMLSchemaValueError("%r is expected to have a redefined/overridden component" % self) @@ -555,15 +571,11 @@ class XsdComplexType(XsdType, ValidationMixin): else: return self.has_simple_content() or self.mixed and self.is_emptiable() - @property - def derivation(self): - return 'extension' if self._derivation else 'restriction' if self._derivation is False else None - def has_restriction(self): - return self._derivation is False + return self.derivation == 'restriction' def has_extension(self): - return self._derivation is True + return self.derivation == 'extension' def text_decode(self, text): if self.has_simple_content(): diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 894f4ac..b602709 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -467,6 +467,9 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) + if xsd_type.is_blocked(self.block): + yield self.validation_error(validation, "usage of %r is blocked" % xsd_type, elem, **kwargs) + # Decode attributes attribute_group = self.get_attributes(xsd_type) for result in attribute_group.iter_decode(elem.attrib, validation, level=level, **kwargs): @@ -572,7 +575,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) for constraint in self.identities.values(): if isinstance(constraint, XsdKeyref) and '_no_deep' in kwargs: # TODO: Complete lazy validation continue - for error in constraint(elem): + for error in constraint(elem, converter): yield self.validation_error(validation, error, elem, **kwargs) def iter_encode(self, obj, validation='lax', converter=None, level=0, **kwargs): diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 332fb72..8623c4b 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -17,8 +17,8 @@ from collections import Counter from elementpath import Selector, XPath1Parser, ElementPathError from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_ANNOTATION, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD -from ..helpers import get_qname, qname_to_prefixed +from ..qnames import XSD_ANNOTATION, XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD +from ..helpers import get_qname, qname_to_prefixed, qname_to_extended from ..etree import etree_getpath from ..regex import get_python_regex @@ -148,7 +148,7 @@ class XsdIdentity(XsdComponent): for xsd_element in self.selector.xpath_selector.iter_select(self.parent): yield xsd_element - def get_fields(self, context, decoders=None): + def get_fields(self, context, namespaces=None, decoders=None): """ Get fields for a schema or instance context element. @@ -170,6 +170,8 @@ class XsdIdentity(XsdComponent): fields.append(result[0]) else: value = decoders[k].data_value(result[0]) + if decoders[k].type.root_type.name == XSD_QNAME: + value = qname_to_extended(value, namespaces) if isinstance(value, list): fields.append(tuple(value)) else: @@ -178,11 +180,12 @@ class XsdIdentity(XsdComponent): raise XMLSchemaValueError("%r field selects multiple values!" % field) return tuple(fields) - def iter_values(self, elem): + def iter_values(self, elem, namespaces): """ Iterate field values, excluding empty values (tuples with all `None` values). - :param elem: Instance XML element. + :param elem: instance XML element. + :param namespaces: XML document namespaces. :return: N-Tuple with value fields. """ current_path = '' @@ -201,7 +204,7 @@ class XsdIdentity(XsdComponent): continue try: - fields = self.get_fields(e, decoders=xsd_fields) + fields = self.get_fields(e, namespaces, decoders=xsd_fields) except XMLSchemaValueError as err: yield XMLSchemaValidationError(self, e, reason=str(err)) else: @@ -212,9 +215,9 @@ class XsdIdentity(XsdComponent): def built(self): return self.selector is not None - def __call__(self, elem): + def __call__(self, elem, namespaces): values = Counter() - for v in self.iter_values(elem): + for v in self.iter_values(elem, namespaces): if isinstance(v, XMLSchemaValidationError): yield v else: @@ -296,27 +299,27 @@ class XsdKeyref(XsdIdentity): def built(self): return self.selector is not None and isinstance(self.refer, XsdIdentity) - def get_refer_values(self, elem): + def get_refer_values(self, elem, namespaces): values = set() for e in elem.iterfind(self.refer_path): - for v in self.refer.iter_values(e): + for v in self.refer.iter_values(e, namespaces): if not isinstance(v, XMLSchemaValidationError): values.add(v) return values - def __call__(self, elem): + def __call__(self, elem, namespaces): if self.refer is None: return refer_values = None - for v in self.iter_values(elem): + for v in self.iter_values(elem, namespaces): if isinstance(v, XMLSchemaValidationError): yield v continue if refer_values is None: try: - refer_values = self.get_refer_values(elem) + refer_values = self.get_refer_values(elem, namespaces) except XMLSchemaValueError as err: yield XMLSchemaValidationError(self, elem, str(err)) continue diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index e79825d..5bcf80c 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -299,12 +299,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.parse_error(err, root) if 'blockDefault' in root.attrib: - try: - self.block_default = get_xsd_derivation_attribute( - root, 'blockDefault', {'extension', 'restriction', 'substitution'} - ) - except ValueError as err: - self.parse_error(err, root) + if self.meta_schema is None: + pass # Skip XSD 1.0 meta-schema that has blockDefault="#all" + else: + try: + self.block_default = get_xsd_derivation_attribute( + root, 'blockDefault', {'extension', 'restriction', 'substitution'} + ) + except ValueError as err: + self.parse_error(err, root) if 'finalDefault' in root.attrib: try: diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index b2d3da2..dcd7284 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -411,7 +411,10 @@ class XsdAtomic(XsdSimpleType): _ADMITTED_TAGS = {XSD_RESTRICTION, XSD_SIMPLE_TYPE} def __init__(self, elem, schema, parent, name=None, facets=None, base_type=None): - self.base_type = base_type + if base_type is None: + self.primitive_type = self + else: + self.base_type = base_type super(XsdAtomic, self).__init__(elem, schema, parent, name, facets) def __repr__(self): @@ -421,38 +424,27 @@ class XsdAtomic(XsdSimpleType): return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) def __setattr__(self, name, value): - if name == 'base_type' and value is not None and not isinstance(value, XsdType): - raise XMLSchemaValueError("%r attribute must be an XsdType instance or None: %r" % (name, value)) super(XsdAtomic, self).__setattr__(name, value) - if name in ('base_type', 'white_space'): - if getattr(self, 'white_space', None) is None: + if name == 'base_type': + assert isinstance(value, XsdType) + if not hasattr(self, 'white_space'): try: - white_space = self.base_type.white_space + self.white_space = self.base_type.white_space except AttributeError: - return + pass + try: + if value.is_simple(): + self.primitive_type = self.base_type.primitive_type else: - if white_space is not None: - self.white_space = white_space + self.primitive_type = self.base_type.content_type.primitive_type + except AttributeError: + self.primitive_type = value @property def admitted_facets(self): - primitive_type = self.primitive_type - if primitive_type is None or primitive_type.is_complex(): + if self.primitive_type.is_complex(): return XSD_10_FACETS if self.xsd_version == '1.0' else XSD_11_FACETS - return primitive_type.admitted_facets - - @property - def primitive_type(self): - if self.base_type is None: - return self - try: - if self.base_type.is_simple(): - return self.base_type.primitive_type - else: - return self.base_type.content_type.primitive_type - except AttributeError: - # The base_type is XsdList or XsdUnion. - return self.base_type + return self.primitive_type.admitted_facets def get_facet(self, tag): try: @@ -479,8 +471,8 @@ class XsdAtomicBuiltin(XsdAtomic): - to_python(value): Decoding from XML - from_python(value): Encoding to XML """ - def __init__(self, elem, schema, name, python_type, base_type=None, admitted_facets=None, facets=None, - to_python=None, from_python=None): + def __init__(self, elem, schema, name, python_type, base_type=None, admitted_facets=None, + facets=None, to_python=None, from_python=None): """ :param name: the XSD type's qualified name. :param python_type: the correspondent Python's type. If a tuple or list of types \ diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index fe623f9..a745772 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -568,6 +568,8 @@ class XsdType(XsdComponent): """Common base class for XSD types.""" abstract = False + blocked = False + block = '' base_type = None derivation = None redefine = None @@ -581,6 +583,34 @@ class XsdType(XsdComponent): def built(self): raise NotImplementedError + @property + def content_type_label(self): + if self.is_empty(): + return 'empty' + elif self.has_simple_content(): + return 'simple' + elif self.is_element_only(): + return 'element-only' + elif self.has_mixed_content(): + return 'mixed' + else: + return 'unknown' + + @property + def root_type(self): + """The root type of the type definition hierarchy. Is itself for a root type.""" + if self.base_type is None: + return self # Note that a XsdUnion type is always considered a root type + + try: + if self.base_type.is_simple(): + return self.base_type.primitive_type + else: + return self.base_type.content_type.primitive_type + except AttributeError: + # The type has complex or XsdList content + return self.base_type + @staticmethod def is_simple(): """Returns `True` if the instance is a simpleType, `False` otherwise.""" @@ -623,22 +653,21 @@ class XsdType(XsdComponent): """ raise NotImplementedError - @property - def content_type_label(self): - if self.is_empty(): - return 'empty' - elif self.has_simple_content(): - return 'simple' - elif self.is_element_only(): - return 'element-only' - elif self.has_mixed_content(): - return 'mixed' - else: - return 'unknown' - def is_derived(self, other, derivation=None): raise NotImplementedError + def is_blocked(self, block=''): + if self.blocked: + return True + elif not block: + return False + elif self.derivation and self.derivation in block: + return True + elif self.base_type is None: + return False + else: + return self.base_type.is_blocked(block) + def is_dynamic_consistent(self, other): return self.is_derived(other) or hasattr(other, 'member_types') and \ any(self.is_derived(mt) for mt in other.member_types) From 8d56d128cac5680258810cde8d7ab316ae27b076 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Mon, 23 Sep 2019 09:06:54 +0200 Subject: [PATCH 18/36] Add XSD 1.1 inheritable check on XML instance validation - Add substitutes to dynamic checks --- xmlschema/validators/assertions.py | 4 +-- xmlschema/validators/complex_types.py | 2 +- xmlschema/validators/elements.py | 52 +++++++++++++++++++++------ xmlschema/validators/groups.py | 26 +++++++++----- xmlschema/validators/schema.py | 7 ++-- xmlschema/validators/wildcards.py | 2 ++ 6 files changed, 69 insertions(+), 24 deletions(-) diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 9cdb581..4dd6dc1 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -65,8 +65,8 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.parser = XPath2Parser(self.namespaces, strict=False, variables=variables, - default_namespace=self.xpath_default_namespace) + self.parser = XPath2Parser(self.namespaces, variables, False, + self.xpath_default_namespace, schema=self.xpath_proxy) @property def built(self): diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index f56c685..56c7ebf 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -718,7 +718,7 @@ class Xsd11ComplexType(XsdComplexType): # Add inheritable attributes if hasattr(self.base_type, 'attributes'): for name, attr in self.base_type.attributes.items(): - if name and attr.inheritable: + if attr.inheritable: if name not in self.attributes: self.attributes[name] = attr elif not self.attributes[name].inheritable: diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index b602709..096b597 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -65,8 +65,9 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) """ type = None - alternatives = () qualified = False + alternatives = () + inheritable = () _ADMITTED_TAGS = {XSD_ELEMENT} _abstract = False @@ -382,7 +383,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return self.type.attributes[get_qname(self.type.target_namespace, name)] return self.type.attributes[name] - def get_type(self, elem): + def get_type(self, elem, inherited=None): return self.type def get_attributes(self, xsd_type): @@ -455,12 +456,16 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) :return: yields a decoded object, eventually preceded by a sequence of \ validation or decoding errors. """ + if self.abstract: + yield self.validation_error(validation, "cannot use an abstract element for validation", elem, **kwargs) + if not isinstance(converter, XMLSchemaConverter): converter = self.schema.get_converter(converter, level=level, **kwargs) + inherited = kwargs.get('inherited') value = content = attributes = None # Get the instance effective type - xsd_type = self.get_type(elem) + xsd_type = self.get_type(elem, inherited) if XSI_TYPE in elem.attrib: try: xsd_type = xsd_type.get_instance_type(elem.attrib, converter) @@ -478,6 +483,14 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) else: attributes = result + if self.inheritable and any(name in self.inheritable for name in elem.attrib): + if inherited: + inherited = inherited.copy() + inherited.update((k, v) for k, v in elem.attrib.items() if k in self.inheritable) + else: + inherited = {k: v for k, v in elem.attrib.items() if k in self.inheritable} + kwargs['inherited'] = inherited + # Checks the xsi:nil attribute of the instance if XSI_NIL in elem.attrib: xsi_nil = elem.attrib[XSI_NIL].strip() @@ -822,10 +835,16 @@ class Xsd11Element(XsdElement): index = self._parse_type() index = self._parse_alternatives(index) self._parse_identity_constraints(index) + if self.parent is None and 'substitutionGroup' in self.elem.attrib: for substitution_group in self.elem.attrib['substitutionGroup'].split(): self._parse_substitution_group(substitution_group) + self._parse_target_namespace() + + if any(v.inheritable for v in self.attributes.values()): + self.inheritable = {k: v for k, v in self.attributes.items() if v.inheritable} + self.xpath_proxy = XMLSchemaProxy(self.schema, self) def _parse_alternatives(self, index=0): @@ -886,7 +905,7 @@ class Xsd11Element(XsdElement): for e in xsd_element.iter_substitutes(): yield e - def get_type(self, elem): + def get_type(self, elem, inherited=None): if not self.alternatives: return self.type @@ -897,11 +916,16 @@ class Xsd11Element(XsdElement): else: elem = etree_element(elem.tag) - for alt in filter(lambda x: x.type is not None, self.alternatives): - if alt.token is None: - return alt.type - elif alt.token.boolean_value(list(alt.token.select(context=XPathContext(root=elem)))): - return alt.type + if inherited: + dummy = etree_element('_dummy_element', attrib=inherited) + + for alt in filter(lambda x: x.type is not None, self.alternatives): + if alt.token is None or alt.test(elem) or alt.test(dummy): + return alt.type + else: + for alt in filter(lambda x: x.type is not None, self.alternatives): + if alt.token is None or alt.test(elem): + return alt.type return self.type @@ -992,7 +1016,9 @@ class XsdAlternative(XsdComponent): self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - parser = XPath2Parser(self.namespaces, strict=False, default_namespace=self.xpath_default_namespace) + parser = XPath2Parser( + self.namespaces, strict=False, default_namespace=self.xpath_default_namespace + ) try: self.path = attrib['test'] @@ -1050,3 +1076,9 @@ class XsdAlternative(XsdComponent): if self.type is not None and self.type.parent is not None: for obj in self.type.iter_components(xsd_classes): yield obj + + def test(self, elem): + try: + return self.token.boolean_value(list(self.token.select(context=XPathContext(elem)))) + except TypeError: + return False diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index c3bfb69..4f1044d 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -481,7 +481,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: return other_max_occurs >= max_occurs * self.max_occurs - def check_dynamic_context(self, elem, xsd_element, converter): + def check_dynamic_context(self, elem, xsd_element, model_element, converter): if isinstance(xsd_element, XsdAnyElement): if xsd_element.process_contents == 'skip': return @@ -501,14 +501,24 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): except KeyError: xsd_type = xsd_element.type - elif XSI_TYPE not in elem.attrib: - return else: - alternatives = xsd_element.alternatives - try: - xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) - except KeyError: + if XSI_TYPE not in elem.attrib: xsd_type = xsd_element.type + else: + alternatives = xsd_element.alternatives + try: + xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) + except KeyError: + xsd_type = xsd_element.type + + if model_element is not xsd_element and model_element.block: + for derivation in model_element.block.split(): + if xsd_type.is_derived(model_element.type, derivation): + reason = "usage of %r with type %s is blocked by head element" + raise XMLSchemaValidationError(self, reason % (xsd_element, derivation)) + + if XSI_TYPE not in elem.attrib: + return # If it's a restriction the context is the base_type's group group = self.restriction if self.restriction is not None else self @@ -602,7 +612,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): break try: - self.check_dynamic_context(child, xsd_element, converter) + self.check_dynamic_context(child, xsd_element, model.element, converter) except XMLSchemaValidationError as err: yield self.validation_error(validation, err, elem, **kwargs) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 5bcf80c..2bf259a 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -1162,6 +1162,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): namespaces.update(source.get_namespaces()) id_map = Counter() + inherited = {} if source.is_lazy() and path is None: # TODO: Document validation in lazy mode. @@ -1172,8 +1173,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): yield self.validation_error('lax', "%r is not an element of the schema" % source.root, source.root) for result in xsd_element.iter_decode(source.root, source=source, namespaces=namespaces, - use_defaults=use_defaults, id_map=id_map, - no_depth=True, drop_results=True): + use_defaults=use_defaults, id_map=id_map, no_depth=True, + inherited=inherited, drop_results=True): if isinstance(result, XMLSchemaValidationError): yield result else: @@ -1190,7 +1191,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for result in xsd_element.iter_decode(elem, source=source, namespaces=namespaces, use_defaults=use_defaults, id_map=id_map, - drop_results=True): + inherited=inherited, drop_results=True): if isinstance(result, XMLSchemaValidationError): yield result else: diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 981bd2d..7f818f6 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -686,6 +686,8 @@ class Xsd11AnyAttribute(XsdAnyAttribute): Content: (annotation?) """ + inheritable = False # Added for reduce checkings on XSD 1.1 attributes + def _parse(self): super(Xsd11AnyAttribute, self)._parse() self._parse_not_constraints() From b6c6e2ac8fe195a203ef314d02a08a12660df202 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Mon, 23 Sep 2019 17:29:08 +0200 Subject: [PATCH 19/36] Change get_context() in document module - Now the optional schema argument is a fallbak in case the schema path is not found into XML resource. --- xmlschema/documents.py | 12 ++++++++---- xmlschema/validators/elements.py | 2 +- xmlschema/validators/groups.py | 2 +- xmlschema/validators/schema.py | 4 +++- xmlschema/validators/simple_types.py | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/xmlschema/documents.py b/xmlschema/documents.py index bc66718..439a8c9 100644 --- a/xmlschema/documents.py +++ b/xmlschema/documents.py @@ -25,12 +25,16 @@ def get_context(source, schema=None, cls=None, locations=None, base_url=None, if cls is None: cls = XMLSchema - if schema is None: + try: schema, locations = fetch_schema_locations(source, locations, base_url=base_url) + except ValueError: + if schema is None: + raise + elif not isinstance(schema, XMLSchemaBase): + schema = cls(schema, validation='strict', locations=locations, base_url=base_url, + defuse=defuse, timeout=timeout) + else: schema = cls(schema, validation='strict', locations=locations, defuse=defuse, timeout=timeout) - elif not isinstance(schema, XMLSchemaBase): - schema = cls(schema, validation='strict', locations=locations, base_url=base_url, - defuse=defuse, timeout=timeout) if not isinstance(source, XMLResource): source = XMLResource(source, defuse=defuse, timeout=timeout, lazy=lazy) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 096b597..49bd354 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -557,7 +557,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if 'filler' in kwargs: value = kwargs['filler'](self) else: - if level == 0: + if level == 0 or self.xsd_version != '1.0': kwargs['_skip_id'] = True for result in xsd_type.iter_decode(text, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 4f1044d..10a74db 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -516,7 +516,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if xsd_type.is_derived(model_element.type, derivation): reason = "usage of %r with type %s is blocked by head element" raise XMLSchemaValidationError(self, reason % (xsd_element, derivation)) - + if XSI_TYPE not in elem.attrib: return diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 2bf259a..ddb1b51 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -1259,6 +1259,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): converter = self.get_converter(converter, namespaces, **kwargs) id_map = Counter() + inherited = {} + if decimal_type is not None: kwargs['decimal_type'] = decimal_type if filler is not None: @@ -1272,7 +1274,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for obj in xsd_element.iter_decode( elem, validation, converter=converter, source=source, namespaces=namespaces, use_defaults=use_defaults, datetime_types=datetime_types, - fill_missing=fill_missing, id_map=id_map, **kwargs): + fill_missing=fill_missing, id_map=id_map, inherited=inherited, **kwargs): yield obj for k, v in id_map.items(): diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index dcd7284..63111d7 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -516,7 +516,7 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.decode_error(validation, obj, self.to_python, reason="value is not an instance of {!r}".format(self.instance_types)) - if self.name == XSD_ID and '_skip_id' not in kwargs: + if self.name == XSD_ID: try: id_map = kwargs['id_map'] except KeyError: @@ -527,7 +527,7 @@ class XsdAtomicBuiltin(XsdAtomic): except TypeError: id_map[obj] = 1 - if id_map[obj] > 1: + if id_map[obj] > 1 and '_skip_id' not in kwargs: yield self.validation_error(validation, "Duplicated xsd:ID value {!r}".format(obj)) elif self.name == XSD_IDREF: From 844ddec3bae6e64491eb1038567de7f84692e218 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 1 Oct 2019 06:44:31 +0200 Subject: [PATCH 20/36] Fix assertions - Add custom parser for assertion facet (without position() and last()). - Move parser initialization to XsdAssert.parse_xpath_test() because all the components must be defined. --- requirements-dev.txt | 2 +- setup.py | 2 +- tox.ini | 4 +- xmlschema/tests/test_xpath.py | 44 ++++++++++---------- xmlschema/validators/assertions.py | 58 ++++++++++++++++----------- xmlschema/validators/complex_types.py | 10 ++--- xmlschema/validators/facets.py | 23 ++++++++++- xmlschema/validators/schema.py | 4 +- xmlschema/xpath.py | 11 +++-- 9 files changed, 96 insertions(+), 62 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index d1363d4..83dfcbd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ setuptools tox coverage -elementpath~=1.2.0 +elementpath~=1.3.0 lxml memory_profiler pathlib2 # For Py27 tests on resources diff --git a/setup.py b/setup.py index a4dc551..603a73e 100755 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ class InstallCommand(install): setup( name='xmlschema', version='1.0.15', - install_requires=['elementpath~=1.2.0'], + install_requires=['elementpath~=1.3.0'], packages=['xmlschema'], include_package_data=True, cmdclass={ diff --git a/tox.ini b/tox.ini index 90b6ff7..2497e14 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ toxworkdir = {homedir}/.tox/xmlschema [testenv] deps = lxml - elementpath~=1.2.0 + elementpath~=1.3.0 py27: pathlib2 memory: memory_profiler docs: Sphinx @@ -25,7 +25,7 @@ whitelist_externals = make [testenv:py38] deps = lxml==4.3.5 - elementpath~=1.2.0 + elementpath~=1.3.0 [testenv:package] commands = python xmlschema/tests/test_package.py diff --git a/xmlschema/tests/test_xpath.py b/xmlschema/tests/test_xpath.py index dacae8f..1a99781 100644 --- a/xmlschema/tests/test_xpath.py +++ b/xmlschema/tests/test_xpath.py @@ -45,43 +45,43 @@ class XsdXPathTest(unittest.TestCase): self.assertTrue(self.xs1.findall('.')) self.assertTrue(isinstance(self.xs1.find('.'), XMLSchema)) self.assertTrue(sorted(self.xs1.findall("*"), key=lambda x: x.name) == elements) - self.assertTrue(self.xs1.findall("*") == self.xs1.findall("./*")) - self.assertTrue(self.xs1.find("./vh:bikes") == self.xs1.elements['bikes']) - self.assertTrue(self.xs1.find("./vh:vehicles/vh:cars").name == self.xs1.elements['cars'].name) - self.assertFalse(self.xs1.find("./vh:vehicles/vh:cars") == self.xs1.elements['cars']) - self.assertFalse(self.xs1.find("/vh:vehicles/vh:cars") == self.xs1.elements['cars']) - self.assertTrue(self.xs1.find("vh:vehicles/vh:cars/..") == self.xs1.elements['vehicles']) - self.assertTrue(self.xs1.find("vh:vehicles/*/..") == self.xs1.elements['vehicles']) - self.assertTrue(self.xs1.find("vh:vehicles/vh:cars/../vh:cars") == self.xs1.find("vh:vehicles/vh:cars")) + self.assertListEqual(self.xs1.findall("*"), self.xs1.findall("./*")) + self.assertEqual(self.xs1.find("./vh:bikes"), self.xs1.elements['bikes']) + self.assertEqual(self.xs1.find("./vh:vehicles/vh:cars").name, self.xs1.elements['cars'].name) + self.assertNotEqual(self.xs1.find("./vh:vehicles/vh:cars"), self.xs1.elements['cars']) + self.assertNotEqual(self.xs1.find("/vh:vehicles/vh:cars"), self.xs1.elements['cars']) + self.assertEqual(self.xs1.find("vh:vehicles/vh:cars/.."), self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/*/.."), self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/vh:cars/../vh:cars"), self.xs1.find("vh:vehicles/vh:cars")) def test_xpath_axis(self): - self.assertTrue(self.xs1.find("vh:vehicles/child::vh:cars/..") == self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/child::vh:cars/.."), self.xs1.elements['vehicles']) def test_xpath_subscription(self): - self.assertTrue(len(self.xs1.findall("./vh:vehicles/*")) == 2) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[2]") == [self.bikes]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[3]") == []) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[last()-1]") == [self.cars]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[position()=last()]") == [self.bikes]) + self.assertEqual(len(self.xs1.findall("./vh:vehicles/*")), 2) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[2]"), [self.bikes]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[3]"), []) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[last()-1]"), [self.cars]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[position()=last()]"), [self.bikes]) def test_xpath_group(self): - self.assertTrue(self.xs1.findall("/(vh:vehicles/*/*)") == self.xs1.findall("/vh:vehicles/*/*")) - self.assertTrue(self.xs1.findall("/(vh:vehicles/*/*)[1]") == self.xs1.findall("/vh:vehicles/*/*[1]")) + self.assertEqual(self.xs1.findall("/(vh:vehicles/*/*)"), self.xs1.findall("/vh:vehicles/*/*")) + self.assertEqual(self.xs1.findall("/(vh:vehicles/*/*)[1]"), self.xs1.findall("/vh:vehicles/*/*[1]")[:1]) def test_xpath_predicate(self): car = self.xs1.elements['cars'].type.content_type[0] - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]") == [car]) - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]") == [car]) - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars['ciao']") == [self.cars]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*['']") == []) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars['ciao']"), [self.cars]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*['']"), []) def test_xpath_descendants(self): selector = Selector('.//xs:element', self.xs2.namespaces, parser=XPath1Parser) elements = list(selector.iter_select(self.xs2.root)) - self.assertTrue(len(elements) == 14) + self.assertEqual(len(elements), 14) selector = Selector('.//xs:element|.//xs:attribute|.//xs:keyref', self.xs2.namespaces, parser=XPath1Parser) elements = list(selector.iter_select(self.xs2.root)) - self.assertTrue(len(elements) == 17) + self.assertEqual(len(elements), 17) def test_xpath_issues(self): namespaces = {'ps': "http://schemas.microsoft.com/powershell/2004/04"} diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 4dd6dc1..2225df0 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -32,48 +32,54 @@ class XsdAssert(XsdComponent, ElementPathMixin): """ _ADMITTED_TAGS = {XSD_ASSERT} token = None + parser = None + path = 'true()' def __init__(self, elem, schema, parent, base_type): self.base_type = base_type super(XsdAssert, self).__init__(elem, schema, parent) + def __repr__(self): + return '%s(test=%r)' % (self.__class__.__name__, self.path) + def _parse(self): super(XsdAssert, self)._parse() - if self.base_type.is_complex(): + if self.base_type.is_simple(): + self.parse_error("base_type=%r is not a complexType definition" % self.base_type) + else: try: self.path = self.elem.attrib['test'] except KeyError as err: self.parse_error(str(err), elem=self.elem) - self.path = 'true()' - - if not self.base_type.has_simple_content(): - variables = {'value': datatypes.XSD_BUILTIN_TYPES['anyType'].value} - else: - try: - builtin_type_name = self.base_type.content_type.primitive_type.local_name - except AttributeError: - variables = {'value': datatypes.XSD_BUILTIN_TYPES['anySimpleType'].value} - else: - variables = {'value': datatypes.XSD_BUILTIN_TYPES[builtin_type_name].value} - - else: - self.parse_error("base_type=%r is not a complexType definition" % self.base_type) - self.path = 'true()' - variables = None if 'xpathDefaultNamespace' in self.elem.attrib: self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.parser = XPath2Parser(self.namespaces, variables, False, - self.xpath_default_namespace, schema=self.xpath_proxy) + + self.xpath_proxy = XMLSchemaProxy(self.schema, self) @property def built(self): return self.token is not None and (self.base_type.parent is None or self.base_type.built) def parse_xpath_test(self): - self.parser.schema = XMLSchemaProxy(self.schema, self) + if self.base_type.has_simple_content(): + variables = {'value': datatypes.XSD_BUILTIN_TYPES['anyType'].value} + elif self.base_type.is_complex(): + try: + builtin_type_name = self.base_type.content_type.primitive_type.local_name + except AttributeError: + variables = {'value': datatypes.XSD_BUILTIN_TYPES['anySimpleType'].value} + else: + variables = {'value': datatypes.XSD_BUILTIN_TYPES[builtin_type_name].value} + else: + variables = None + + self.parser = XPath2Parser( + self.namespaces, variables, False, self.xpath_default_namespace, schema=self.xpath_proxy + ) + try: self.token = self.parser.parse(self.path) except ElementPathError as err: @@ -81,10 +87,16 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.token = self.parser.parse('true()') def __call__(self, elem, value=None, source=None, **kwargs): - self.parser.variables['value'] = value - root = elem if source is None else source.root + if value is not None: + self.parser.variables['value'] = self.base_type.text_decode(value) + + if source is None: + context = XPathContext(root=elem) + else: + context = XPathContext(root=source.root, item=elem) + try: - if not self.token.evaluate(XPathContext(root=root, item=elem)): + if not self.token.evaluate(context.copy()): msg = "expression is not true with test path %r." yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) except ElementPathError as err: diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 56c7ebf..322b458 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -546,7 +546,7 @@ class XsdComplexType(XsdType, ValidationMixin): for obj in self.base_type.iter_components(xsd_classes): yield obj - for obj in self.assertions: + for obj in filter(lambda x: x.base_type is self, self.assertions): if xsd_classes is None or isinstance(obj, xsd_classes): yield obj @@ -857,7 +857,7 @@ class Xsd11ComplexType(XsdComplexType): def _parse_content_tail(self, elem, **kwargs): self.attributes = self.schema.BUILDERS.attribute_group_class(elem, self.schema, self, **kwargs) - self.assertions = [] - for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): - if child.tag == XSD_ASSERT: - self.assertions.append(XsdAssert(child, self.schema, self, self)) + + self.assertions = [XsdAssert(e, self.schema, self, self) for e in elem if e.tag == XSD_ASSERT] + if getattr(self.base_type, 'assertions', None): + self.assertions.extend(assertion for assertion in self.base_type.assertions) diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 31ea622..6aecf28 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -643,6 +643,25 @@ class XsdPatternFacets(MutableSequence, XsdFacet): return [e.get('value', '') for e in self._elements] +class XsdAssertionXPathParser(XPath2Parser): + """Parser for XSD 1.1 assertion facets.""" + +XsdAssertionXPathParser.unregister('last') +XsdAssertionXPathParser.unregister('position') + +@XsdAssertionXPathParser.method(XsdAssertionXPathParser.function('last', nargs=0)) +def evaluate(self, context=None): + self.missing_context("Context item size is undefined") + + +@XsdAssertionXPathParser.method(XsdAssertionXPathParser.function('position', nargs=0)) +def evaluate(self, context=None): + self.missing_context("Context item position is undefined") + + +XsdAssertionXPathParser.build_tokenizer() + + class XsdAssertionFacet(XsdFacet): """ XSD 1.1 *assertion* facet for simpleType definitions. @@ -678,8 +697,8 @@ class XsdAssertionFacet(XsdFacet): self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.parser = XPath2Parser(self.namespaces, strict=False, variables=variables, - default_namespace=self.xpath_default_namespace) + self.parser = XsdAssertionXPathParser(self.namespaces, strict=False, variables=variables, + default_namespace=self.xpath_default_namespace) try: self.token = self.parser.parse(self.path) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index ddb1b51..cc3a6f1 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -814,7 +814,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): def get_element(self, tag, path=None, namespaces=None): if not path: - return self.find(tag) + return self.find(tag, namespaces) elif path[-1] == '*': return self.find(path[:-1] + tag, namespaces) else: @@ -1185,7 +1185,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): schema_path = '/%s/*' % source.root.tag for elem in source.iterfind(path, namespaces): - xsd_element = self.get_element(elem.tag, schema_path, namespaces) + xsd_element = self.get_element(elem.tag, schema_path, self.namespaces) if xsd_element is None: yield self.validation_error('lax', "%r is not an element of the schema" % elem, elem) diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index ef40fe6..e87159c 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -213,7 +213,7 @@ class ElementPathMixin(Sequence): default_namespace=self.xpath_default_namespace) root_token = parser.parse(path) context = XMLSchemaContext(self) - return root_token.select(context) + return root_token.select_results(context) def find(self, path, namespaces=None): """ @@ -226,14 +226,17 @@ class ElementPathMixin(Sequence): path = path.strip() if path.startswith('/') and not path.startswith('//'): path = ''.join(['/', XSD_SCHEMA, path]) + if namespaces is None: namespaces = {k: v for k, v in self.namespaces.items() if k} + namespaces[''] = self.xpath_default_namespace + elif '' not in namespaces: + namespaces[''] = self.xpath_default_namespace - parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy, - default_namespace=self.xpath_default_namespace) + parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) root_token = parser.parse(path) context = XMLSchemaContext(self) - return next(root_token.select(context), None) + return next(root_token.select_results(context), None) def findall(self, path, namespaces=None): """ From 9afff86ee913aecd9d5862d48d69b405ac80c95a Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 2 Oct 2019 14:11:53 +0200 Subject: [PATCH 21/36] Remove validators and patterns checks for XsdList/XsdUnion - The base XsdList and XsdUnion types have no validators or patterns, that are added only to restrictions. - Added patterns optional argument to XsdUnion.iter_decode(): derived types pass our patterns to the xs:union base type, that checks them after normalization with the selected member type. --- xmlschema/validators/simple_types.py | 64 ++++++---------------------- 1 file changed, 13 insertions(+), 51 deletions(-) diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 63111d7..c4e5866 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -739,10 +739,6 @@ class XsdList(XsdSimpleType): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - items = [] for chunk in obj.split(): for result in self.base_type.iter_decode(chunk, validation, **kwargs): @@ -751,22 +747,12 @@ class XsdList(XsdSimpleType): else: items.append(result) - if validation != 'skip': - for validator in self.validators: - for error in validator(items): - yield error - yield items def iter_encode(self, obj, validation='lax', **kwargs): if not hasattr(obj, '__iter__') or isinstance(obj, (str, unicode_type, bytes)): obj = [obj] - if validation != 'skip': - for validator in self.validators: - for error in validator(obj): - yield error - encoded_items = [] for item in obj: for result in self.base_type.iter_encode(item, validation, **kwargs): @@ -888,22 +874,15 @@ class XsdUnion(XsdSimpleType): for obj in mt.iter_components(xsd_classes): yield obj - def iter_decode(self, obj, validation='lax', **kwargs): - if isinstance(obj, (string_base_type, bytes)): - obj = self.normalize(obj) - - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - - # Try the text as a whole + def iter_decode(self, obj, validation='lax', patterns=None, **kwargs): + # Try decoding the whole text for member_type in self.member_types: for result in member_type.iter_decode(obj, validation='lax', **kwargs): if not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(result): - yield error + if validation != 'skip' and patterns: + obj = member_type.normalize(obj) + for error in patterns(obj): + yield error yield result return @@ -935,24 +914,12 @@ class XsdUnion(XsdSimpleType): reason = "no type suitable for decoding the values %r." % not_decodable yield self.decode_error(validation, obj, self.member_types, reason) - for validator in self.validators: - for error in validator(items): - yield error - yield items if len(items) > 1 else items[0] if items else None def iter_encode(self, obj, validation='lax', **kwargs): for member_type in self.member_types: for result in member_type.iter_encode(obj, validation='lax', **kwargs): if result is not None and not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(obj): - yield error - if self.patterns is not None: - for error in self.patterns(result): - yield error - yield result return elif validation == 'strict': @@ -965,14 +932,6 @@ class XsdUnion(XsdSimpleType): for item in obj: for result in member_type.iter_encode(item, validation='lax', **kwargs): if result is not None and not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(result): - yield error - if self.patterns is not None: - for error in self.patterns(result): - yield error - results.append(result) break elif validation == 'strict': @@ -1154,10 +1113,6 @@ class XsdAtomicRestriction(XsdAtomic): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - if self.base_type.is_simple(): base_type = self.base_type elif self.base_type.has_simple_content(): @@ -1169,6 +1124,13 @@ class XsdAtomicRestriction(XsdAtomic): raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " "simple or mixed content required." % self.base_type) + if validation != 'skip' and self.patterns: + if not isinstance(self.primitive_type, XsdUnion): + for error in self.patterns(obj): + yield error + elif 'patterns' not in kwargs: + kwargs['patterns'] = self.patterns + for result in base_type.iter_decode(obj, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): yield result From 7fcacde31397838bdbb61e6389d2b270770fb907 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 3 Oct 2019 19:08:14 +0200 Subject: [PATCH 22/36] Fix issue #136 - Protect facets validators from type and value errors - Add strictly_equal() helper to check fixed value equality - Move get_instance_type() from XsdType to global maps --- xmlschema/helpers.py | 5 ++ xmlschema/tests/test_w3c_suite.py | 4 +- xmlschema/tests/validation/test_decoding.py | 46 ++++++++++++++++ xmlschema/validators/assertions.py | 1 + xmlschema/validators/complex_types.py | 11 ---- xmlschema/validators/elements.py | 26 +++++---- xmlschema/validators/facets.py | 58 +++++++++++++++------ xmlschema/validators/globals_.py | 25 +++++++-- xmlschema/validators/groups.py | 14 +++-- xmlschema/validators/simple_types.py | 21 +++----- 10 files changed, 149 insertions(+), 62 deletions(-) diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index a2c75fe..d2df751 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -213,6 +213,11 @@ def count_digits(number): return 0, num_digits - exponent - 1 +def strictly_equal(obj1, obj2): + """Checks if the objects are equal and are of the same type.""" + return obj1 == obj2 and type(obj1) is type(obj2) + + class ParticleCounter(object): """ An helper class for counting total min/max occurrences of XSD particles. diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 4ad14a8..a9d4d77 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -246,7 +246,9 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 test_conf['source'] = source_path return test_conf - if args.numbers and testgroup_num not in args.numbers: + if group_num == 1: + return # Skip introspection tests that have several failures due to schema mismatch. + elif args.numbers and group_num not in args.numbers: return name = group_elem.attrib['name'] diff --git a/xmlschema/tests/validation/test_decoding.py b/xmlschema/tests/validation/test_decoding.py index 786e629..93d2050 100644 --- a/xmlschema/tests/validation/test_decoding.py +++ b/xmlschema/tests/validation/test_decoding.py @@ -316,6 +316,52 @@ class TestDecoding(XsdValidatorTestCase): xml_dict = xmlschema.to_dict(col_xml_string, self.col_schema.url, namespaces=self.col_namespaces) self.assertTrue(xml_dict, COLLECTION_DICT) + def test_date_decoding(self): + # Issue #136 + schema = xmlschema.XMLSchema(""" + + + + + + + + + + """) + + self.assertEqual(schema.to_dict("2019-01-01"), '2019-01-01') + self.assertEqual(schema.to_dict("2019-01-01", datetime_types=True), + datatypes.Date10.fromstring('2019-01-01')) + + data, errors = schema.to_dict("2019-01-01", validation='lax') + self.assertEqual(data, '2019-01-01') + self.assertEqual(errors, []) + + data, errors = schema.to_dict("2019-01-01", validation='lax', datetime_types=True) + self.assertEqual(data, datatypes.Date10.fromstring('2019-01-01')) + self.assertEqual(errors, []) + + data, errors = schema.to_dict("1999-12-31", validation='lax') + self.assertEqual(data, '1999-12-31') + self.assertEqual(len(errors), 1) + self.assertIn('value has to be greater or equal than', unicode_type(errors[0])) + + data, errors = schema.to_dict("1999-12-31", validation='lax', datetime_types=True) + self.assertEqual(data, datatypes.Date10.fromstring('1999-12-31')) + self.assertEqual(len(errors), 1) + + data, errors = schema.to_dict("2019", validation='lax') + self.assertIsNone(data) + self.assertEqual(len(errors), 1) + + with self.assertRaises(XMLSchemaValidationError): + schema.to_dict("2019") + + data, errors = schema.to_dict("2019", validation='lax') + self.assertIsNone(data) + self.assertEqual(len(errors), 1) + def test_json_dump_and_load(self): vh_xml_tree = ElementTree.parse(self.vh_xml_file) col_xml_tree = ElementTree.parse(self.col_xml_file) diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 2225df0..4b42ceb 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -11,6 +11,7 @@ from __future__ import unicode_literals from elementpath import datatypes, XPath2Parser, XPathContext, ElementPathError +from ..etree import ElementTree from ..qnames import XSD_ASSERT from ..xpath import ElementPathMixin, XMLSchemaProxy diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 322b458..89723b6 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -554,17 +554,6 @@ class XsdComplexType(XsdType, ValidationMixin): def get_facet(*_args, **_kwargs): return None - def get_instance_type(self, attrs, namespaces): - if XSI_TYPE in self.attributes: - self.attributes[XSI_TYPE].validate(attrs[XSI_TYPE]) - - type_qname = qname_to_extended(attrs[XSI_TYPE], namespaces) - xsi_type = self.maps.lookup_type(type_qname) - if not xsi_type.is_derived(self): - raise XMLSchemaTypeError("%r is not a derived type of %r" % (xsi_type, self)) - - return xsi_type - def admit_simple_restriction(self): if 'restriction' in self.final: return False diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 49bd354..589fc9d 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -18,13 +18,14 @@ from elementpath import XPath2Parser, ElementPathError, XPathContext from elementpath.datatypes import AbstractDateTime, Duration from ..exceptions import XMLSchemaAttributeError -from ..qnames import XSD_ANNOTATION, XSD_GROUP, \ - XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, \ - XSD_SIMPLE_TYPE, XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, \ +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ + XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, \ + XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, \ XSD_KEY, XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID, XSD_ERROR from ..helpers import get_qname, get_xsd_derivation_attribute, \ get_xsd_form_attribute, ParticleCounter from ..etree import etree_element +from ..helpers import strictly_equal from ..converters import ElementData, raw_xml_encode, XMLSchemaConverter from ..xpath import XMLSchemaProxy, ElementPathMixin @@ -467,8 +468,9 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) # Get the instance effective type xsd_type = self.get_type(elem, inherited) if XSI_TYPE in elem.attrib: + type_name = elem.attrib[XSI_TYPE].strip() try: - xsd_type = xsd_type.get_instance_type(elem.attrib, converter) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, converter) except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) @@ -531,7 +533,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) text = self.fixed elif text == self.fixed or validation == 'skip': pass - elif xsd_type.text_decode(text) != xsd_type.text_decode(self.fixed): + elif not strictly_equal(xsd_type.text_decode(text), xsd_type.text_decode(self.fixed)): reason = "must has the fixed value %r." % self.fixed yield self.validation_error(validation, reason, elem, **kwargs) @@ -539,15 +541,15 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) text = self.default if xsd_type.is_complex(): + for assertion in xsd_type.assertions: + for error in assertion(elem, value=text, **kwargs): + yield self.validation_error(validation, error, **kwargs) + if text and xsd_type.content_type.is_list(): value = text.split() else: value = text - for assertion in xsd_type.assertions: - for error in assertion(elem, value=value, **kwargs): - yield self.validation_error(validation, error, **kwargs) - xsd_type = xsd_type.content_type if text is None: @@ -559,6 +561,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) else: if level == 0 or self.xsd_version != '1.0': kwargs['_skip_id'] = True + for result in xsd_type.iter_decode(text, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) @@ -616,8 +619,9 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) xsd_type = self.get_type(element_data) if XSI_TYPE in element_data.attributes: + type_name = element_data.attributes[XSI_TYPE].strip() try: - xsd_type = xsd_type.get_instance_type(element_data.attributes, converter) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, converter) except (KeyError, TypeError) as err: errors.append(err) @@ -1080,5 +1084,5 @@ class XsdAlternative(XsdComponent): def test(self, elem): try: return self.token.boolean_value(list(self.token.select(context=XPathContext(elem)))) - except TypeError: + except (TypeError, ValueError): return False diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 6aecf28..7e56574 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -42,8 +42,11 @@ class XsdFacet(XsdComponent): return '%s(value=%r, fixed=%r)' % (self.__class__.__name__, self.value, self.fixed) def __call__(self, value): - for error in self.validator(value): - yield error + try: + for error in self.validator(value): + yield error + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) def _parse(self): super(XsdFacet, self)._parse() @@ -290,9 +293,13 @@ class XsdMinInclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x < self.value: - yield XMLSchemaValidationError(self, x, "value has to be greater or equal than %r." % self.value) + def __call__(self, value): + try: + if value < self.value: + reason = "value has to be greater or equal than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMinExclusiveFacet(XsdFacet): @@ -328,9 +335,13 @@ class XsdMinExclusiveFacet(XsdFacet): if facet is not None and facet.value <= self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x <= self.value: - yield XMLSchemaValidationError(self, x, "value has to be greater than %r." % self.value) + def __call__(self, value): + try: + if value <= self.value: + reason = "value has to be greater than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMaxInclusiveFacet(XsdFacet): @@ -366,9 +377,13 @@ class XsdMaxInclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x > self.value: - yield XMLSchemaValidationError(self, x, "value has to be lesser or equal than %r." % self.value) + def __call__(self, value): + try: + if value > self.value: + reason = "value has to be lesser or equal than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMaxExclusiveFacet(XsdFacet): @@ -404,9 +419,13 @@ class XsdMaxExclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x >= self.value: - yield XMLSchemaValidationError(self, x, "value has to be lesser than %r" % self.value) + def __call__(self, value): + try: + if value >= self.value: + reason = "value has to be lesser than %r" % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdTotalDigitsFacet(XsdFacet): @@ -634,9 +653,12 @@ class XsdPatternFacets(MutableSequence, XsdFacet): return '%s(%s...\'])' % (self.__class__.__name__, s[:70]) def __call__(self, text): - if all(pattern.match(text) is None for pattern in self.patterns): - msg = "value doesn't match any pattern of %r." - yield XMLSchemaValidationError(self, text, reason=msg % self.regexps) + try: + if all(pattern.match(text) is None for pattern in self.patterns): + msg = "value doesn't match any pattern of %r." + yield XMLSchemaValidationError(self, text, reason=msg % self.regexps) + except TypeError as err: + yield XMLSchemaValidationError(self, text, unicode_type(err)) @property def regexps(self): @@ -646,9 +668,11 @@ class XsdPatternFacets(MutableSequence, XsdFacet): class XsdAssertionXPathParser(XPath2Parser): """Parser for XSD 1.1 assertion facets.""" + XsdAssertionXPathParser.unregister('last') XsdAssertionXPathParser.unregister('position') + @XsdAssertionXPathParser.method(XsdAssertionXPathParser.function('last', nargs=0)) def evaluate(self, context=None): self.missing_context("Context item size is undefined") diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index 3b39f74..1e2a9ee 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -19,8 +19,8 @@ from ..compat import string_base_type from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning from ..namespaces import XSD_NAMESPACE from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, XSD_SIMPLE_TYPE, \ - XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ELEMENT -from ..helpers import get_qname, local_name + XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ELEMENT, XSI_TYPE +from ..helpers import get_qname, local_name, qname_to_extended from ..namespaces import NamespaceResourcesMap from . import XMLSchemaNotBuiltError, XMLSchemaModelError, XMLSchemaModelDepthError, \ @@ -125,8 +125,8 @@ def create_lookup_function(xsd_classes): obj = global_map[qname] except KeyError: if '{' in qname: - raise XMLSchemaKeyError("missing a %s component for %r!" % (types_desc, qname)) - raise XMLSchemaKeyError("missing a %s component for %r! As the name has no namespace " + raise XMLSchemaKeyError("missing an %s component for %r!" % (types_desc, qname)) + raise XMLSchemaKeyError("missing an %s component for %r! As the name has no namespace " "maybe a missing default namespace declaration." % (types_desc, qname)) else: if isinstance(obj, xsd_classes): @@ -281,6 +281,23 @@ class XsdGlobals(XsdValidator): else: raise XMLSchemaValueError("wrong tag {!r} for an XSD global definition/declaration".format(tag)) + def get_instance_type(self, type_name, base_type, namespaces): + """ + Returns the instance XSI type from global maps, validating it with the reference base type. + + :param type_name: the XSI type attribute value, a QName in prefixed format. + :param base_type: the XSD from which the instance type has to be derived. + :param namespaces: a map from prefixes to namespaces. + """ + if base_type.is_complex() and XSI_TYPE in base_type.attributes: + base_type.attributes[XSI_TYPE].validate(type_name) + + extended_name = qname_to_extended(type_name, namespaces) + xsi_type = lookup_type(extended_name, self.types, self.validator.BUILDERS_MAP) + if not xsi_type.is_derived(base_type): + raise XMLSchemaTypeError("%r is not a derived type of %r" % (xsi_type, self)) + return xsi_type + @property def built(self): return all(schema.built for schema in self.iter_schemas()) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 10a74db..55638de 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -482,6 +482,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): return other_max_occurs >= max_occurs * self.max_occurs def check_dynamic_context(self, elem, xsd_element, model_element, converter): + alternatives = () if isinstance(xsd_element, XsdAnyElement): if xsd_element.process_contents == 'skip': return @@ -489,17 +490,20 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): try: xsd_element = self.maps.lookup_element(elem.tag) except LookupError: - alternatives = () try: - xsd_type = self.any_type.get_instance_type(elem.attrib, converter) + type_name = elem.attrib[XSI_TYPE].strip() except KeyError: return + else: + xsd_type = self.maps.get_instance_type(type_name, self.any_type, converter) else: alternatives = xsd_element.alternatives try: - xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) + type_name = elem.attrib[XSI_TYPE].strip() except KeyError: xsd_type = xsd_element.type + else: + xsd_type = self.maps.get_instance_type(type_name, xsd_element.type, converter) else: if XSI_TYPE not in elem.attrib: @@ -507,9 +511,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: alternatives = xsd_element.alternatives try: - xsd_type = xsd_element.type.get_instance_type(elem.attrib, converter) + type_name = elem.attrib[XSI_TYPE].strip() except KeyError: xsd_type = xsd_element.type + else: + xsd_type = self.maps.get_instance_type(type_name, xsd_element.type, converter) if model_element is not xsd_element and model_element.block: for derivation in model_element.block.split(): diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index c4e5866..5da8a33 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -23,8 +23,8 @@ from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ - XSD_ERROR, XSD_ASSERT, XSI_TYPE -from ..helpers import get_qname, local_name, qname_to_extended, get_xsd_derivation_attribute + XSD_ERROR, XSD_ASSERT +from ..helpers import get_qname, local_name, get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ XMLSchemaDecodeError, XMLSchemaParseError @@ -361,7 +361,7 @@ class XsdSimpleType(XsdType, ValidationMixin): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip': + if validation != 'skip' and obj is not None: if self.patterns is not None: for error in self.patterns(obj): yield error @@ -377,7 +377,7 @@ class XsdSimpleType(XsdType, ValidationMixin): elif validation != 'skip': yield self.encode_error(validation, obj, unicode_type) - if validation != 'skip': + if validation != 'skip' and obj is not None: if self.patterns is not None: for error in self.patterns(obj): yield error @@ -391,13 +391,6 @@ class XsdSimpleType(XsdType, ValidationMixin): def get_facet(self, tag): return self.facets.get(tag) - def get_instance_type(self, attrs, namespaces): - type_qname = qname_to_extended(attrs[XSI_TYPE], namespaces) - xsi_type = self.maps.lookup_type(type_qname) - if not xsi_type.is_derived(self): - raise XMLSchemaValueError("%r is not a derived type of %r" % (xsi_type, self)) - return xsi_type - # # simpleType's derived classes: @@ -1137,7 +1130,7 @@ class XsdAtomicRestriction(XsdAtomic): if isinstance(result, XMLSchemaDecodeError): yield unicode_type(obj) if validation == 'skip' else None else: - if validation != 'skip': + if validation != 'skip' and result is not None: for validator in self.validators: for error in validator(result): yield error @@ -1150,7 +1143,7 @@ class XsdAtomicRestriction(XsdAtomic): if not hasattr(obj, '__iter__') or isinstance(obj, (str, unicode_type, bytes)): obj = [] if obj is None or obj == '' else [obj] - if validation != 'skip': + if validation != 'skip' and obj is not None: for validator in self.validators: for error in validator(obj): yield error @@ -1186,7 +1179,7 @@ class XsdAtomicRestriction(XsdAtomic): yield unicode_type(obj) if validation == 'skip' else None return else: - if validation != 'skip': + if validation != 'skip' and obj is not None: for validator in self.validators: for error in validator(obj): yield error From b7b6fef418fa5a0e87b4a7c0a731c5849ffba599 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Mon, 7 Oct 2019 15:31:18 +0200 Subject: [PATCH 23/36] Base modules refactoring for fix ElementTree import --- xmlschema/__init__.py | 1 + xmlschema/converters.py | 4 +- xmlschema/etree.py | 29 +----- xmlschema/helpers.py | 107 +------------------- xmlschema/namespaces.py | 12 ++- xmlschema/qnames.py | 99 +++++++++++++++++- xmlschema/resources.py | 8 +- xmlschema/tests/__init__.py | 10 +- xmlschema/tests/test_helpers.py | 6 +- xmlschema/tests/test_resources.py | 3 +- xmlschema/tests/validation/test_encoding.py | 5 +- xmlschema/validators/assertions.py | 12 ++- xmlschema/validators/attributes.py | 5 +- xmlschema/validators/builtins.py | 17 +++- xmlschema/validators/complex_types.py | 12 +-- xmlschema/validators/elements.py | 9 +- xmlschema/validators/exceptions.py | 5 +- xmlschema/validators/globals_.py | 9 +- xmlschema/validators/groups.py | 3 +- xmlschema/validators/identities.py | 11 +- xmlschema/validators/notations.py | 5 +- xmlschema/validators/schema.py | 1 + xmlschema/validators/simple_types.py | 4 +- xmlschema/validators/wildcards.py | 4 +- xmlschema/validators/xsdbase.py | 7 +- xmlschema/xpath.py | 3 + 26 files changed, 201 insertions(+), 190 deletions(-) diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 80685c0..e9adc67 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -10,6 +10,7 @@ # from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError, \ XMLSchemaNamespaceError +from .etree import etree_tostring from .resources import ( normalize_url, fetch_resource, load_xml_resource, fetch_namespaces, fetch_schema_locations, fetch_schema, XMLResource diff --git a/xmlschema/converters.py b/xmlschema/converters.py index d7deaa3..3570d47 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -18,9 +18,9 @@ import warnings from .compat import ordered_dict_class, unicode_type from .exceptions import XMLSchemaValueError -from .etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace from .namespaces import XSI_NAMESPACE -from .helpers import local_name +from .qnames import local_name +from .etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace from xmlschema.namespaces import NamespaceMapper ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes']) diff --git a/xmlschema/etree.py b/xmlschema/etree.py index 9d75f22..d79d7da 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -13,8 +13,8 @@ This module contains ElementTree setup and helpers for xmlschema package. """ from __future__ import unicode_literals import sys -import re import importlib +import re from collections import Counter try: @@ -23,10 +23,9 @@ except ImportError: lxml_etree = None from .compat import PY3 -from .exceptions import XMLSchemaValueError, XMLSchemaTypeError -from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE -from .helpers import get_namespace, get_qname, qname_to_prefixed -from .xpath import ElementPathMixin +from .exceptions import XMLSchemaTypeError, XMLSchemaValueError +from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE, get_namespace +from .qnames import get_qname, qname_to_prefixed ### # Programmatic import of xml.etree.ElementTree @@ -130,11 +129,6 @@ class SafeXMLParser(PyElementTree.XMLParser): ) -def is_etree_element(elem): - """More safer test for matching ElementTree elements.""" - return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin) - - def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False): """ Serialize an Element tree to a string. Tab characters are replaced by whitespaces. @@ -267,21 +261,6 @@ def etree_getpath(elem, root, namespaces=None, relative=True, add_position=False return path -def etree_last_child(elem): - """Returns the last child of the element, ignoring children that are lxml comments.""" - for child in reversed(elem): - if not callable(child.tag): - return child - - -def etree_child_index(elem, child): - """Return the index or raise ValueError if it is not a *child* of *elem*.""" - for index in range(len(elem)): - if elem[index] is child: - return index - raise XMLSchemaValueError("%r is not a child of %r" % (child, elem)) - - def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True): """ Tests the equality of two XML Element trees. diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index d2df751..8a77e86 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -11,116 +11,19 @@ """ This module contains various helper functions and classes. """ -import re from decimal import Decimal from .compat import string_base_type -from .exceptions import XMLSchemaValueError, XMLSchemaTypeError +from .exceptions import XMLSchemaValueError from .qnames import XSD_ANNOTATION +from .xpath import ElementPathMixin XSD_FINAL_ATTRIBUTE_VALUES = {'restriction', 'extension', 'list', 'union'} -NAMESPACE_PATTERN = re.compile(r'{([^}]*)}') -def get_namespace(name): - try: - return NAMESPACE_PATTERN.match(name).group(1) - except (AttributeError, TypeError): - return '' - - -def get_qname(uri, name): - """ - Returns an expanded QName from URI and local part. If any argument has boolean value - `False` or if the name is already an expanded QName, returns the *name* argument. - - :param uri: namespace URI - :param name: local or qualified name - :return: string or the name argument - """ - if not uri or not name or name[0] in ('{', '.', '/', '['): - return name - else: - return '{%s}%s' % (uri, name) - - -def local_name(qname): - """ - Return the local part of an expanded QName or a prefixed name. If the name - is `None` or empty returns the *name* argument. - - :param qname: an expanded QName or a prefixed name or a local name. - """ - try: - if qname[0] == '{': - _, qname = qname.split('}') - elif ':' in qname: - _, qname = qname.split(':') - except IndexError: - return '' - except ValueError: - raise XMLSchemaValueError("the argument 'qname' has a wrong format: %r" % qname) - except TypeError: - if qname is None: - return qname - raise XMLSchemaTypeError("the argument 'qname' must be a string-like object or None") - else: - return qname - - -def qname_to_prefixed(qname, namespaces): - """ - Transforms a fully qualified name into a prefixed name using a namespace map. - Returns the *qname* argument if it's not a fully qualified name or if it has - boolean value `False`. - - :param qname: an extended QName or a local name. - :param namespaces: a map from prefixes to namespace URIs. - :return: a QName in prefixed format or a local name. - """ - if not qname: - return qname - - namespace = get_namespace(qname) - for prefix, uri in sorted(filter(lambda x: x[1] == namespace, namespaces.items()), reverse=True): - if not uri: - return '%s:%s' % (prefix, qname) if prefix else qname - elif prefix: - return qname.replace('{%s}' % uri, '%s:' % prefix) - else: - return qname.replace('{%s}' % uri, '') - else: - return qname - - -def qname_to_extended(qname, namespaces): - """ - Converts a QName in prefixed format or a local name to the extended QName format. - - :param qname: a QName in prefixed format or a local name. - :param namespaces: a map from prefixes to namespace URIs. - :return: a QName in extended format or a local name. - """ - try: - if qname[0] == '{' or not namespaces: - return qname - except IndexError: - return qname - - try: - prefix, name = qname.split(':', 1) - except ValueError: - if not namespaces.get(''): - return qname - else: - return '{%s}%s' % (namespaces[''], qname) - else: - try: - uri = namespaces[prefix] - except KeyError: - return qname - else: - return u'{%s}%s' % (uri, name) if uri else name +def is_etree_element(elem): + """More safer test for matching ElementTree elements.""" + return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin) def get_xsd_annotation(elem): diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py index 9be0986..beff6c6 100644 --- a/xmlschema/namespaces.py +++ b/xmlschema/namespaces.py @@ -12,9 +12,9 @@ This module contains namespace definitions for W3C core standards and namespace related classes. """ from __future__ import unicode_literals +import re from .compat import MutableMapping, Mapping -from .helpers import get_namespace XSD_NAMESPACE = 'http://www.w3.org/2001/XMLSchema' "URI of the XML Schema Definition namespace (xs|xsd)" @@ -42,6 +42,16 @@ VC_NAMESPACE = 'http://www.w3.org/2007/XMLSchema-versioning' "URI of the XML Schema Versioning namespace (vc)" +NAMESPACE_PATTERN = re.compile(r'{([^}]*)}') + + +def get_namespace(name): + try: + return NAMESPACE_PATTERN.match(name).group(1) + except (AttributeError, TypeError): + return '' + + class NamespaceResourcesMap(MutableMapping): """ Dictionary for storing information about namespace resources. The values are diff --git a/xmlschema/qnames.py b/xmlschema/qnames.py index 4ec4a12..ae5ec65 100644 --- a/xmlschema/qnames.py +++ b/xmlschema/qnames.py @@ -9,9 +9,11 @@ # @author Davide Brunato # """ -This module contains qualified names constants. +This module contains qualified names constants and helpers. """ from __future__ import unicode_literals +from .exceptions import XMLSchemaTypeError, XMLSchemaValueError +from .namespaces import get_namespace VC_TEMPLATE = '{http://www.w3.org/2007/XMLSchema-versioning}%s' XML_TEMPLATE = '{http://www.w3.org/XML/1998/namespace}%s' @@ -181,3 +183,98 @@ XSD_DATE_TIME_STAMP = XSD_TEMPLATE % 'dateTimeStamp' XSD_DAY_TIME_DURATION = XSD_TEMPLATE % 'dayTimeDuration' XSD_YEAR_MONTH_DURATION = XSD_TEMPLATE % 'yearMonthDuration' XSD_ERROR = XSD_TEMPLATE % 'error' + + + +def get_qname(uri, name): + """ + Returns an expanded QName from URI and local part. If any argument has boolean value + `False` or if the name is already an expanded QName, returns the *name* argument. + + :param uri: namespace URI + :param name: local or qualified name + :return: string or the name argument + """ + if not uri or not name or name[0] in ('{', '.', '/', '['): + return name + else: + return '{%s}%s' % (uri, name) + + +def local_name(qname): + """ + Return the local part of an expanded QName or a prefixed name. If the name + is `None` or empty returns the *name* argument. + + :param qname: an expanded QName or a prefixed name or a local name. + """ + try: + if qname[0] == '{': + _, qname = qname.split('}') + elif ':' in qname: + _, qname = qname.split(':') + except IndexError: + return '' + except ValueError: + raise XMLSchemaValueError("the argument 'qname' has a wrong format: %r" % qname) + except TypeError: + if qname is None: + return qname + raise XMLSchemaTypeError("the argument 'qname' must be a string-like object or None") + else: + return qname + + +def qname_to_prefixed(qname, namespaces): + """ + Transforms a fully qualified name into a prefixed name using a namespace map. + Returns the *qname* argument if it's not a fully qualified name or if it has + boolean value `False`. + + :param qname: an extended QName or a local name. + :param namespaces: a map from prefixes to namespace URIs. + :return: a QName in prefixed format or a local name. + """ + if not qname: + return qname + + namespace = get_namespace(qname) + for prefix, uri in sorted(filter(lambda x: x[1] == namespace, namespaces.items()), reverse=True): + if not uri: + return '%s:%s' % (prefix, qname) if prefix else qname + elif prefix: + return qname.replace('{%s}' % uri, '%s:' % prefix) + else: + return qname.replace('{%s}' % uri, '') + else: + return qname + + +def qname_to_extended(qname, namespaces): + """ + Converts a QName in prefixed format or a local name to the extended QName format. + + :param qname: a QName in prefixed format or a local name. + :param namespaces: a map from prefixes to namespace URIs. + :return: a QName in extended format or a local name. + """ + try: + if qname[0] == '{' or not namespaces: + return qname + except IndexError: + return qname + + try: + prefix, name = qname.split(':', 1) + except ValueError: + if not namespaces.get(''): + return qname + else: + return '{%s}%s' % (namespaces[''], qname) + else: + try: + uri = namespaces[prefix] + except KeyError: + return qname + else: + return u'{%s}%s' % (uri, name) if uri else name diff --git a/xmlschema/resources.py b/xmlschema/resources.py index b2898f7..1cf37c1 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -18,9 +18,9 @@ from .compat import ( pathname2url, URLError, uses_relative ) from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError +from .namespaces import get_namespace from .qnames import XSI_SCHEMA_LOCATION, XSI_NONS_SCHEMA_LOCATION -from .helpers import get_namespace -from .etree import ElementTree, PyElementTree, SafeXMLParser, is_etree_element, etree_tostring +from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring DEFUSE_MODES = ('always', 'remote', 'never') @@ -285,7 +285,7 @@ class XMLResource(object): def _fromsource(self, source): url, lazy = None, self._lazy - if is_etree_element(source): + if hasattr(source, 'tag'): self._lazy = False return source, None, None, None # Source is already an Element --> nothing to load elif isinstance(source, string_base_type): @@ -344,7 +344,7 @@ class XMLResource(object): except (AttributeError, TypeError): pass else: - if is_etree_element(root): + if hasattr(root, 'tag'): self._lazy = False return root, source, None, None diff --git a/xmlschema/tests/__init__.py b/xmlschema/tests/__init__.py index 56296fb..9190c32 100644 --- a/xmlschema/tests/__init__.py +++ b/xmlschema/tests/__init__.py @@ -20,13 +20,11 @@ import xmlschema from xmlschema import XMLSchema from xmlschema.compat import urlopen, URLError, unicode_type from xmlschema.exceptions import XMLSchemaValueError -from xmlschema.etree import ( - is_etree_element, etree_element, etree_register_namespace, etree_elements_assert_equal -) -from xmlschema.resources import fetch_namespaces from xmlschema.qnames import XSD_SCHEMA -from xmlschema.helpers import get_namespace -from xmlschema.namespaces import XSD_NAMESPACE +from xmlschema.namespaces import XSD_NAMESPACE, get_namespace +from xmlschema.etree import etree_element, etree_register_namespace, etree_elements_assert_equal +from xmlschema.resources import fetch_namespaces +from xmlschema.helpers import is_etree_element def has_network_access(*locations): diff --git a/xmlschema/tests/test_helpers.py b/xmlschema/tests/test_helpers.py index 1e96cb1..be195ef 100644 --- a/xmlschema/tests/test_helpers.py +++ b/xmlschema/tests/test_helpers.py @@ -20,10 +20,10 @@ import xml.etree.ElementTree as ElementTree from xmlschema import XMLSchema, XMLSchemaParseError from xmlschema.etree import etree_element, prune_etree -from xmlschema.namespaces import XSD_NAMESPACE, XSI_NAMESPACE -from xmlschema.helpers import get_xsd_annotation, get_namespace, get_qname, local_name, \ - qname_to_prefixed, get_xsd_derivation_attribute, count_digits +from xmlschema.namespaces import XSD_NAMESPACE, XSI_NAMESPACE, get_namespace from xmlschema.qnames import XSI_TYPE, XSD_SCHEMA, XSD_ELEMENT, XSD_SIMPLE_TYPE, XSD_ANNOTATION +from xmlschema.qnames import get_qname, local_name, qname_to_prefixed +from xmlschema.helpers import get_xsd_annotation, get_xsd_derivation_attribute, count_digits class TestHelpers(unittest.TestCase): diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py index eebf8c6..c983e08 100644 --- a/xmlschema/tests/test_resources.py +++ b/xmlschema/tests/test_resources.py @@ -26,8 +26,9 @@ from xmlschema import ( ) from xmlschema.tests import casepath from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO -from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, is_etree_element, \ +from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, \ etree_element, py_etree_element +from xmlschema.helpers import is_etree_element def is_windows_path(path): diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py index ffcd3f5..30a90d5 100644 --- a/xmlschema/tests/validation/test_encoding.py +++ b/xmlschema/tests/validation/test_encoding.py @@ -15,9 +15,10 @@ import unittest from xmlschema import XMLSchemaEncodeError, XMLSchemaValidationError from xmlschema.converters import UnorderedConverter from xmlschema.compat import unicode_type, ordered_dict_class -from xmlschema.etree import etree_element, etree_tostring, is_etree_element, ElementTree +from xmlschema.qnames import local_name +from xmlschema.etree import etree_element, etree_tostring, ElementTree from xmlschema.validators.exceptions import XMLSchemaChildrenValidationError -from xmlschema.helpers import local_name +from xmlschema.helpers import is_etree_element from xmlschema.tests import XsdValidatorTestCase from xmlschema.validators import XMLSchema11 diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index 4b42ceb..c2ddca5 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -11,7 +11,6 @@ from __future__ import unicode_literals from elementpath import datatypes, XPath2Parser, XPathContext, ElementPathError -from ..etree import ElementTree from ..qnames import XSD_ASSERT from ..xpath import ElementPathMixin, XMLSchemaProxy @@ -49,7 +48,7 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.parse_error("base_type=%r is not a complexType definition" % self.base_type) else: try: - self.path = self.elem.attrib['test'] + self.path = self.elem.attrib['test'].strip() except KeyError as err: self.parse_error(str(err), elem=self.elem) @@ -87,7 +86,7 @@ class XsdAssert(XsdComponent, ElementPathMixin): self.parse_error(err, elem=self.elem) self.token = self.parser.parse('true()') - def __call__(self, elem, value=None, source=None, **kwargs): + def __call__(self, elem, value=None, source=None, namespaces=None, **kwargs): if value is not None: self.parser.variables['value'] = self.base_type.text_decode(value) @@ -96,6 +95,11 @@ class XsdAssert(XsdComponent, ElementPathMixin): else: context = XPathContext(root=source.root, item=elem) + default_namespace = self.parser.namespaces[''] + + if namespaces and '' in namespaces: + self.parser.namespaces[''] = namespaces[''] + try: if not self.token.evaluate(context.copy()): msg = "expression is not true with test path %r." @@ -103,6 +107,8 @@ class XsdAssert(XsdComponent, ElementPathMixin): except ElementPathError as err: yield XMLSchemaValidationError(self, obj=elem, reason=str(err)) + self.parser.namespaces[''] = default_namespace + # For implementing ElementPathMixin def __iter__(self): if not self.parent.has_simple_content(): diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index f2eb2b3..78df62d 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -19,8 +19,9 @@ from ..compat import MutableMapping, ordered_dict_class from ..exceptions import XMLSchemaAttributeError, XMLSchemaTypeError, XMLSchemaValueError from ..qnames import XSD_ANNOTATION, XSD_ANY_SIMPLE_TYPE, XSD_SIMPLE_TYPE, \ XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_RESTRICTION, XSD_EXTENSION, \ - XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE -from ..helpers import get_namespace, get_qname, get_xsd_form_attribute + XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE, \ + get_namespace, get_qname +from ..helpers import get_xsd_form_attribute from ..namespaces import XSI_NAMESPACE from .exceptions import XMLSchemaValidationError diff --git a/xmlschema/validators/builtins.py b/xmlschema/validators/builtins.py index 43fd7f0..682e879 100644 --- a/xmlschema/validators/builtins.py +++ b/xmlschema/validators/builtins.py @@ -25,8 +25,21 @@ from elementpath import datatypes from ..compat import PY3, long_type, unicode_type from ..exceptions import XMLSchemaValueError -from ..qnames import * -from ..etree import etree_element, is_etree_element +from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ + XSD_PATTERN, XSD_WHITE_SPACE, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, \ + XSD_MAX_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_EXPLICIT_TIMEZONE, \ + XSD_STRING, XSD_NORMALIZED_STRING, XSD_NAME, XSD_NCNAME, XSD_QNAME, XSD_TOKEN, \ + XSD_NMTOKEN, XSD_ID, XSD_IDREF, XSD_LANGUAGE, XSD_DECIMAL, XSD_DOUBLE, XSD_FLOAT, \ + XSD_INTEGER, XSD_BYTE, XSD_SHORT, XSD_INT, XSD_LONG, XSD_UNSIGNED_BYTE, \ + XSD_UNSIGNED_SHORT, XSD_UNSIGNED_INT, XSD_UNSIGNED_LONG, XSD_POSITIVE_INTEGER, \ + XSD_NEGATIVE_INTEGER, XSD_NON_NEGATIVE_INTEGER, XSD_NON_POSITIVE_INTEGER, \ + XSD_GDAY, XSD_GMONTH, XSD_GMONTH_DAY, XSD_GYEAR, XSD_GYEAR_MONTH, XSD_TIME, XSD_DATE, \ + XSD_DATETIME, XSD_DATE_TIME_STAMP, XSD_ENTITY, XSD_ANY_URI, XSD_BOOLEAN, \ + XSD_DURATION, XSD_DAY_TIME_DURATION, XSD_YEAR_MONTH_DURATION, XSD_BASE64_BINARY, \ + XSD_HEX_BINARY, XSD_NOTATION_TYPE, XSD_ERROR, XSD_ASSERTION, XSD_SIMPLE_TYPE, \ + XSD_COMPLEX_TYPE, XSD_ANY_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ANY_SIMPLE_TYPE +from ..etree import etree_element +from ..helpers import is_etree_element from .exceptions import XMLSchemaValidationError from .facets import XSD_10_FACETS_BUILDERS, XSD_11_FACETS_BUILDERS from .simple_types import XsdSimpleType, XsdAtomicBuiltin diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 89723b6..1a7fe2b 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -10,12 +10,12 @@ # from __future__ import unicode_literals -from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError -from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, XSD_ALL, \ - XSD_CHOICE, XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, XSD_RESTRICTION, \ - XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, \ - XSD_OPEN_CONTENT, XSD_ASSERT, XSI_TYPE -from ..helpers import get_qname, local_name, qname_to_extended, get_xsd_derivation_attribute +from ..exceptions import XMLSchemaValueError +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, \ + XSD_ALL, XSD_CHOICE, XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, \ + XSD_RESTRICTION, XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, \ + XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, get_qname, local_name +from ..helpers import get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError from .xsdbase import XsdType, ValidationMixin diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 589fc9d..d849806 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -20,12 +20,11 @@ from elementpath.datatypes import AbstractDateTime, Duration from ..exceptions import XMLSchemaAttributeError from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, \ - XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, \ - XSD_KEY, XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID, XSD_ERROR -from ..helpers import get_qname, get_xsd_derivation_attribute, \ - get_xsd_form_attribute, ParticleCounter + XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, XSD_KEY, \ + XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID, XSD_ERROR, get_qname from ..etree import etree_element -from ..helpers import strictly_equal +from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute, \ + ParticleCounter, strictly_equal from ..converters import ElementData, raw_xml_encode, XMLSchemaConverter from ..xpath import XMLSchemaProxy, ElementPathMixin diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index fdb1836..e47a1ec 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -15,8 +15,9 @@ from __future__ import unicode_literals from ..compat import PY3, string_base_type from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError -from ..etree import etree_tostring, is_etree_element, etree_getpath -from ..helpers import qname_to_prefixed +from ..qnames import qname_to_prefixed +from ..etree import etree_tostring, etree_getpath +from ..helpers import is_etree_element from ..resources import XMLResource diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index 1e2a9ee..d610324 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -17,11 +17,10 @@ from collections import Counter from ..compat import string_base_type from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning -from ..namespaces import XSD_NAMESPACE -from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, XSD_SIMPLE_TYPE, \ - XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ELEMENT, XSI_TYPE -from ..helpers import get_qname, local_name, qname_to_extended -from ..namespaces import NamespaceResourcesMap +from ..namespaces import XSD_NAMESPACE, NamespaceResourcesMap +from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, \ + XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, \ + XSD_ELEMENT, XSI_TYPE, get_qname, local_name, qname_to_extended from . import XMLSchemaNotBuiltError, XMLSchemaModelError, XMLSchemaModelDepthError, \ XsdValidator, XsdComponent, XsdAttribute, XsdSimpleType, XsdComplexType, XsdElement, \ diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 55638de..ed27409 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -18,8 +18,7 @@ from ..compat import unicode_type from ..exceptions import XMLSchemaValueError from ..etree import etree_element from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ - XSD_CHOICE, XSD_ELEMENT, XSD_ANY, XSI_TYPE -from xmlschema.helpers import get_qname, local_name + XSD_CHOICE, XSD_ELEMENT, XSD_ANY, XSI_TYPE, get_qname, local_name from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError, \ XMLSchemaTypeTableWarning diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 8623c4b..58d2d0e 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -17,8 +17,8 @@ from collections import Counter from elementpath import Selector, XPath1Parser, ElementPathError from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_ANNOTATION, XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD -from ..helpers import get_qname, qname_to_prefixed, qname_to_extended +from ..qnames import XSD_ANNOTATION, XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, \ + XSD_SELECTOR, XSD_FIELD, get_qname, qname_to_prefixed, qname_to_extended from ..etree import etree_getpath from ..regex import get_python_regex @@ -152,9 +152,10 @@ class XsdIdentity(XsdComponent): """ Get fields for a schema or instance context element. - :param context: Context Element or XsdElement - :param decoders: Context schema fields decoders. - :return: A tuple with field values. An empty field is replaced by `None`. + :param context: context Element or XsdElement + :param namespaces: is an optional mapping from namespace prefix to URI. + :param decoders: context schema fields decoders. + :return: a tuple with field values. An empty field is replaced by `None`. """ fields = [] for k, field in enumerate(self.fields): diff --git a/xmlschema/validators/notations.py b/xmlschema/validators/notations.py index 6a79980..05efe52 100644 --- a/xmlschema/validators/notations.py +++ b/xmlschema/validators/notations.py @@ -10,10 +10,7 @@ # from __future__ import unicode_literals -from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_NOTATION -from ..helpers import get_qname - +from ..qnames import XSD_NOTATION, get_qname from .xsdbase import XsdComponent diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index cc3a6f1..e18d2af 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -938,6 +938,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): import_error = None for url in locations: try: + # print("Import namespace ", namespace, url) self.import_schema(namespace, url, self.base_url) except (OSError, IOError) as err: # It's not an error if the location access fails (ref. section 4.2.6.2): diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 5da8a33..2e9fd63 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -23,8 +23,8 @@ from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ - XSD_ERROR, XSD_ASSERT -from ..helpers import get_qname, local_name, get_xsd_derivation_attribute + XSD_ERROR, XSD_ASSERT, get_qname, local_name +from ..helpers import get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ XMLSchemaDecodeError, XMLSchemaParseError diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 7f818f6..cb4c8d7 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -14,9 +14,9 @@ This module contains classes for XML Schema wildcards. from __future__ import unicode_literals from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, XSD_DEFAULT_OPEN_CONTENT -from ..helpers import get_namespace from ..namespaces import XSI_NAMESPACE +from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, \ + XSD_DEFAULT_OPEN_CONTENT, get_namespace from ..xpath import XMLSchemaProxy, ElementPathMixin from .exceptions import XMLSchemaNotBuiltError diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index a745772..cf450ce 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -17,9 +17,10 @@ import re from ..compat import PY3, string_base_type, unicode_type from ..exceptions import XMLSchemaValueError, XMLSchemaTypeError from ..qnames import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, XML_LANG, \ - XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID, XSD_OVERRIDE -from ..helpers import get_qname, local_name, qname_to_prefixed -from ..etree import etree_tostring, is_etree_element + XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID, XSD_OVERRIDE, \ + get_qname, local_name, qname_to_prefixed +from ..etree import etree_tostring +from ..helpers import is_etree_element from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, \ XMLSchemaDecodeError, XMLSchemaEncodeError diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index e87159c..1c48722 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -120,6 +120,9 @@ class XMLSchemaProxy(AbstractSchemaProxy): except KeyError: return None + def find(self, path, namespaces=None): + return self._schema.find(path, namespaces) + def is_instance(self, obj, type_qname): xsd_type = self._schema.maps.types[type_qname] try: From 433970cf724a2321f2371256b5f1ee6a0094530f Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Mon, 7 Oct 2019 22:18:08 +0200 Subject: [PATCH 24/36] Add FALLBACK_LOCATIONS to schema class - XLink namespace removed from base schemas - Fallback locations dictionary added for XLink and XHTML namespaces - Fix for issue #137 (local mode) --- xmlschema/validators/schema.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index e18d2af..4903a58 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -18,6 +18,7 @@ the standard. import os from collections import namedtuple, Counter from abc import ABCMeta +import logging import warnings import re @@ -71,10 +72,12 @@ ANY_ELEMENT = etree_element( 'maxOccurs': 'unbounded' }) +# XSD schemas of W3C standards SCHEMAS_DIR = os.path.join(os.path.dirname(__file__), 'schemas/') XML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd') XSI_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-instance_minimal.xsd') XLINK_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xlink.xsd') +XHTML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd') VC_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-versioning_minimal.xsd') @@ -180,6 +183,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype BUILDERS_MAP: dict :cvar BASE_SCHEMAS: a dictionary from namespace to schema resource for meta-schema bases. :vartype BASE_SCHEMAS: dict + :cvar FALLBACK_LOCATIONS: fallback schema location hints for other standard namespaces. + :vartype FALLBACK_LOCATIONS: dict :cvar meta_schema: the XSD meta-schema instance. :vartype meta_schema: XMLSchema :cvar attribute_form_default: the schema's *attributeFormDefault* attribute, defaults to 'unqualified'. @@ -237,6 +242,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): BUILDERS = None BUILDERS_MAP = None BASE_SCHEMAS = None + FALLBACK_LOCATIONS = None meta_schema = None # Schema defaults @@ -318,8 +324,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): # Set locations hints self.locations = NamespaceResourcesMap(self.source.get_locations(locations)) if self.meta_schema is not None: - # Add fallback schema location hint for XHTML - self.locations[XHTML_NAMESPACE] = os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd') + self.locations.update(self.FALLBACK_LOCATIONS) self.converter = self.get_converter(converter) self.xpath_proxy = XMLSchemaProxy(self) @@ -938,7 +943,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): import_error = None for url in locations: try: - # print("Import namespace ", namespace, url) self.import_schema(namespace, url, self.base_url) except (OSError, IOError) as err: # It's not an error if the location access fails (ref. section 4.2.6.2): @@ -1425,7 +1429,10 @@ class XMLSchema10(XMLSchemaBase): BASE_SCHEMAS = { XML_NAMESPACE: XML_SCHEMA_FILE, XSI_NAMESPACE: XSI_SCHEMA_FILE, + } + FALLBACK_LOCATIONS = { XLINK_NAMESPACE: XLINK_SCHEMA_FILE, + XHTML_NAMESPACE: XHTML_SCHEMA_FILE, } @@ -1486,9 +1493,12 @@ class XMLSchema11(XMLSchemaBase): XSD_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSD_1.1/xsd11-extra.xsd'), XML_NAMESPACE: XML_SCHEMA_FILE, XSI_NAMESPACE: XSI_SCHEMA_FILE, - XLINK_NAMESPACE: XLINK_SCHEMA_FILE, VC_NAMESPACE: VC_SCHEMA_FILE, } + FALLBACK_LOCATIONS = { + XLINK_NAMESPACE: XLINK_SCHEMA_FILE, + XHTML_NAMESPACE: XHTML_SCHEMA_FILE, + } def _include_schemas(self): super(XMLSchema11, self)._include_schemas() From 690a172502bb354f4e4d95956efece900f3a56d6 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Tue, 8 Oct 2019 11:07:24 +0200 Subject: [PATCH 25/36] Add logging for schema initialization and building - Add loglevel argument to schema init - Fallback location (for XLink and XHTML) moved at the end of import tentatives (full fix for issue #137) - Fix TestGlobalMaps after the remove of XLink from base schemas --- doc/api.rst | 2 - xmlschema/tests/test_factory/schema_tests.py | 7 ++- xmlschema/tests/test_meta.py | 36 +++++++------- xmlschema/validators/schema.py | 51 +++++++++++++++----- 4 files changed, 63 insertions(+), 33 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 74013fe..9e57b7c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -164,8 +164,6 @@ Resource access API .. autofunction:: xmlschema.normalize_url - - XSD components API ------------------ diff --git a/xmlschema/tests/test_factory/schema_tests.py b/xmlschema/tests/test_factory/schema_tests.py index fa72bc3..6796ef3 100644 --- a/xmlschema/tests/test_factory/schema_tests.py +++ b/xmlschema/tests/test_factory/schema_tests.py @@ -14,6 +14,7 @@ import pdb import os import pickle import time +import logging import warnings from xmlschema import XMLSchemaBase @@ -46,6 +47,7 @@ def make_schema_test_class(test_file, test_args, test_num, schema_class, check_w locations = test_args.locations defuse = test_args.defuse debug_mode = test_args.debug + loglevel = logging.DEBUG if debug_mode else None class TestSchema(XsdValidatorTestCase): @@ -61,9 +63,10 @@ def make_schema_test_class(test_file, test_args, test_num, schema_class, check_w def check_xsd_file(self): if expected_errors > 0: - xs = schema_class(xsd_file, validation='lax', locations=locations, defuse=defuse) + xs = schema_class(xsd_file, validation='lax', locations=locations, + defuse=defuse, loglevel=loglevel) else: - xs = schema_class(xsd_file, locations=locations, defuse=defuse) + xs = schema_class(xsd_file, locations=locations, defuse=defuse, loglevel=loglevel) self.errors.extend(xs.maps.all_errors) if inspect: diff --git a/xmlschema/tests/test_meta.py b/xmlschema/tests/test_meta.py index 1d047e5..e4d9fd3 100644 --- a/xmlschema/tests/test_meta.py +++ b/xmlschema/tests/test_meta.py @@ -281,33 +281,33 @@ class TestGlobalMaps(unittest.TestCase): def test_xsd_10_globals(self): self.assertEqual(len(XMLSchema10.meta_schema.maps.notations), 2) - self.assertEqual(len(XMLSchema10.meta_schema.maps.types), 108) - self.assertEqual(len(XMLSchema10.meta_schema.maps.attributes), 18) - self.assertEqual(len(XMLSchema10.meta_schema.maps.attribute_groups), 9) - self.assertEqual(len(XMLSchema10.meta_schema.maps.groups), 18) - self.assertEqual(len(XMLSchema10.meta_schema.maps.elements), 45) - self.assertEqual(len([e.is_global() for e in XMLSchema10.meta_schema.maps.iter_globals()]), 200) + self.assertEqual(len(XMLSchema10.meta_schema.maps.types), 92) + self.assertEqual(len(XMLSchema10.meta_schema.maps.attributes), 8) + self.assertEqual(len(XMLSchema10.meta_schema.maps.attribute_groups), 3) + self.assertEqual(len(XMLSchema10.meta_schema.maps.groups), 12) + self.assertEqual(len(XMLSchema10.meta_schema.maps.elements), 41) + self.assertEqual(len([e.is_global() for e in XMLSchema10.meta_schema.maps.iter_globals()]), 158) self.assertEqual(len(XMLSchema10.meta_schema.maps.substitution_groups), 0) def test_xsd_11_globals(self): self.assertEqual(len(XMLSchema11.meta_schema.maps.notations), 2) - self.assertEqual(len(XMLSchema11.meta_schema.maps.types), 119) - self.assertEqual(len(XMLSchema11.meta_schema.maps.attributes), 24) - self.assertEqual(len(XMLSchema11.meta_schema.maps.attribute_groups), 10) - self.assertEqual(len(XMLSchema11.meta_schema.maps.groups), 19) - self.assertEqual(len(XMLSchema11.meta_schema.maps.elements), 51) - self.assertEqual(len([e.is_global() for e in XMLSchema11.meta_schema.maps.iter_globals()]), 225) + self.assertEqual(len(XMLSchema11.meta_schema.maps.types), 103) + self.assertEqual(len(XMLSchema11.meta_schema.maps.attributes), 14) + self.assertEqual(len(XMLSchema11.meta_schema.maps.attribute_groups), 4) + self.assertEqual(len(XMLSchema11.meta_schema.maps.groups), 13) + self.assertEqual(len(XMLSchema11.meta_schema.maps.elements), 47) + self.assertEqual(len([e.is_global() for e in XMLSchema11.meta_schema.maps.iter_globals()]), 183) self.assertEqual(len(XMLSchema11.meta_schema.maps.substitution_groups), 1) def test_xsd_10_build(self): - self.assertEqual(len([e for e in XMLSchema10.meta_schema.maps.iter_globals()]), 200) + self.assertEqual(len([e for e in XMLSchema10.meta_schema.maps.iter_globals()]), 158) self.assertTrue(XMLSchema10.meta_schema.maps.built) XMLSchema10.meta_schema.maps.clear() XMLSchema10.meta_schema.maps.build() self.assertTrue(XMLSchema10.meta_schema.maps.built) def test_xsd_11_build(self): - self.assertEqual(len([e for e in XMLSchema11.meta_schema.maps.iter_globals()]), 225) + self.assertEqual(len([e for e in XMLSchema11.meta_schema.maps.iter_globals()]), 183) self.assertTrue(XMLSchema11.meta_schema.maps.built) XMLSchema11.meta_schema.maps.clear() XMLSchema11.meta_schema.maps.build() @@ -321,8 +321,8 @@ class TestGlobalMaps(unittest.TestCase): total_counter += 1 if c.is_global(): global_counter += 1 - self.assertEqual(global_counter, 200) - self.assertEqual(total_counter, 901) + self.assertEqual(global_counter, 158) + self.assertEqual(total_counter, 782) def test_xsd_11_components(self): total_counter = 0 @@ -332,8 +332,8 @@ class TestGlobalMaps(unittest.TestCase): total_counter += 1 if c.is_global(): global_counter += 1 - self.assertEqual(global_counter, 225) - self.assertEqual(total_counter, 1051) + self.assertEqual(global_counter, 183) + self.assertEqual(total_counter, 932) def test_xsd_11_restrictions(self): all_model_type = XMLSchema11.meta_schema.types['all'] diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 4903a58..0c3c8c4 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -55,6 +55,9 @@ from .wildcards import XsdAnyElement, XsdAnyAttribute, Xsd11AnyElement, \ Xsd11AnyAttribute, XsdDefaultOpenContent from .globals_ import XsdGlobals +logger = logging.getLogger('xmlschema') +logging.basicConfig(format='[%(levelname)s] %(message)s') + XSD_VERSION_PATTERN = re.compile(r'^\d+\.\d+$') # Elements for building dummy groups @@ -172,6 +175,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): meta-schema is added at the end. In the latter case the meta-schema is rebuilt if any base \ namespace has been overridden by an import. Ignored if the argument *global_maps* is provided. :type use_meta: bool + :param loglevel: for setting a different logging level for schema initialization \ + and building. For default is WARNING (30). + :type loglevel: int :cvar XSD_VERSION: store the XSD version (1.0 or 1.1). :vartype XSD_VERSION: str @@ -258,10 +264,18 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): default_open_content = None override = None - def __init__(self, source, namespace=None, validation='strict', global_maps=None, converter=None, - locations=None, base_url=None, defuse='remote', timeout=300, build=True, use_meta=True): + def __init__(self, source, namespace=None, validation='strict', global_maps=None, + converter=None, locations=None, base_url=None, defuse='remote', + timeout=300, build=True, use_meta=True, loglevel=None): super(XMLSchemaBase, self).__init__(validation) + if loglevel is not None: + logger.setLevel(loglevel) + elif build and global_maps is None: + logger.setLevel(logging.WARNING) + self.source = XMLResource(source, base_url, defuse, timeout, lazy=False) + logger.debug("Read schema from %r", self.source) + self.imports = {} self.includes = {} self.warnings = [] @@ -291,6 +305,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if '' not in self.namespaces: self.namespaces[''] = namespace + logger.debug("Schema targetNamespace is %r", self.target_namespace) + logger.debug("Declared namespaces: %r", self.namespaces) + # Parses the schema defaults if 'attributeFormDefault' in root.attrib: try: @@ -321,11 +338,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): except ValueError as err: self.parse_error(err, root) - # Set locations hints self.locations = NamespaceResourcesMap(self.source.get_locations(locations)) - if self.meta_schema is not None: - self.locations.update(self.FALLBACK_LOCATIONS) - self.converter = self.get_converter(converter) self.xpath_proxy = XMLSchemaProxy(self) self.empty_attribute_group = self.BUILDERS.attribute_group_class( @@ -396,8 +409,12 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.default_open_content = XsdDefaultOpenContent(child, self) break - if build: - self.maps.build() + try: + if build: + self.maps.build() + finally: + if loglevel is not None: + logger.setLevel(logging.WARNING) # Restore default logging def __repr__(self): if self.url: @@ -829,7 +846,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): """Processes schema document inclusions and redefinitions.""" for child in filter(lambda x: x.tag == XSD_INCLUDE, self.root): try: - self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Include schema from %r", location) + self.include_schema(location, self.base_url) except KeyError: pass except (OSError, IOError) as err: @@ -850,7 +869,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for child in filter(lambda x: x.tag == XSD_REDEFINE, self.root): try: - schema = self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Redefine schema %r", location) + schema = self.include_schema(location, self.base_url) except KeyError: pass # Attribute missing error already found by validation against meta-schema except (OSError, IOError) as err: @@ -940,13 +961,18 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if local_hints: locations = local_hints + locations + if namespace in self.FALLBACK_LOCATIONS: + locations.append(self.FALLBACK_LOCATIONS[namespace]) + import_error = None for url in locations: try: + logger.debug("Import namespace %r from %r", namespace, url) self.import_schema(namespace, url, self.base_url) except (OSError, IOError) as err: # It's not an error if the location access fails (ref. section 4.2.6.2): # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport + logger.debug('%s', err) if import_error is None: import_error = err except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: @@ -963,6 +989,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): except XMLSchemaValueError as err: self.parse_error(err) else: + logger.info("Namespace %r imported from %r", namespace, url) break else: if import_error is not None: @@ -1505,7 +1532,9 @@ class XMLSchema11(XMLSchemaBase): for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.root): try: - schema = self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Override schema %r", location) + schema = self.include_schema(location, self.base_url) except KeyError: pass # Attribute missing error already found by validation against meta-schema except (OSError, IOError) as err: From 75664150e6e7929cf7627d67ab5c7684d63c07d1 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Wed, 9 Oct 2019 14:59:42 +0200 Subject: [PATCH 26/36] Improve XPath bindings - Extend ElementPathMixin with instance _xpath_parser - Add xpath_tokens dictionary to schema objects - Fix serialization removing xpath_tokens classes --- xmlschema/validators/assertions.py | 11 ++- xmlschema/validators/elements.py | 16 ++-- xmlschema/validators/schema.py | 21 ++++-- xmlschema/validators/wildcards.py | 5 +- xmlschema/xpath.py | 116 ++++++++++++++++++----------- 5 files changed, 109 insertions(+), 60 deletions(-) diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index c2ddca5..d57c532 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -57,8 +57,6 @@ class XsdAssert(XsdComponent, ElementPathMixin): else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.xpath_proxy = XMLSchemaProxy(self.schema, self) - @property def built(self): return self.token is not None and (self.base_type.parent is None or self.base_type.built) @@ -77,7 +75,11 @@ class XsdAssert(XsdComponent, ElementPathMixin): variables = None self.parser = XPath2Parser( - self.namespaces, variables, False, self.xpath_default_namespace, schema=self.xpath_proxy + namespaces=self.namespaces, + variables=variables, + strict=False, + default_namespace=self.xpath_default_namespace, + schema=XMLSchemaProxy(self.schema, self) ) try: @@ -89,6 +91,8 @@ class XsdAssert(XsdComponent, ElementPathMixin): def __call__(self, elem, value=None, source=None, namespaces=None, **kwargs): if value is not None: self.parser.variables['value'] = self.base_type.text_decode(value) + if not self.parser.is_schema_bound(): + self.parser.schema.bind_parser(self.parser) if source is None: context = XPathContext(root=elem) @@ -96,7 +100,6 @@ class XsdAssert(XsdComponent, ElementPathMixin): context = XPathContext(root=source.root, item=elem) default_namespace = self.parser.namespaces[''] - if namespaces and '' in namespaces: self.parser.namespaces[''] = namespaces[''] diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index d849806..5a39819 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -96,8 +96,11 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def __setattr__(self, name, value): if name == "type": - assert value is None or isinstance(value, XsdType), "Wrong value %r for attribute 'type'." % value - self.attributes = self.get_attributes(value) + assert value is None or isinstance(value, XsdType) + try: + self.attributes = value.attributes + except AttributeError: + self.attributes = self.schema.create_empty_attribute_group(self) super(XsdElement, self).__setattr__(name, value) def __iter__(self): @@ -105,6 +108,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) for e in self.type.content_type.iter_elements(): yield e + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) + def _parse(self): XsdComponent._parse(self) self._parse_attributes() @@ -112,7 +119,6 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) self._parse_identity_constraints(index) if self.parent is None and 'substitutionGroup' in self.elem.attrib: self._parse_substitution_group(self.elem.attrib['substitutionGroup']) - self.xpath_proxy = XMLSchemaProxy(self.schema, self) def _parse_attributes(self): self._parse_particle(self.elem) @@ -390,7 +396,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) try: return xsd_type.attributes except AttributeError: - return self.schema.empty_attribute_group + return self.attributes def get_path(self, ancestor=None, reverse=False): """ @@ -848,8 +854,6 @@ class Xsd11Element(XsdElement): if any(v.inheritable for v in self.attributes.values()): self.inheritable = {k: v for k, v in self.attributes.items() if v.inheritable} - self.xpath_proxy = XMLSchemaProxy(self.schema, self) - def _parse_alternatives(self, index=0): if self.ref is not None: self.alternatives = self.ref.alternatives diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 0c3c8c4..faeec6e 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -216,8 +216,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype maps: XsdGlobals :ivar converter: the default converter used for XML data decoding/encoding. :vartype converter: XMLSchemaConverter - :ivar xpath_proxy: a proxy for XPath operations on schema components. - :vartype xpath_proxy: XMLSchemaProxy :ivar locations: schema location hints. :vartype locations: NamespaceResourcesMap :ivar namespaces: a dictionary that maps from the prefixes used by the schema into namespace URI. @@ -340,10 +338,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.locations = NamespaceResourcesMap(self.source.get_locations(locations)) self.converter = self.get_converter(converter) - self.xpath_proxy = XMLSchemaProxy(self) - self.empty_attribute_group = self.BUILDERS.attribute_group_class( - etree_element(XSD_ATTRIBUTE_GROUP), self, self - ) + self.xpath_tokens = {} # Create or set the XSD global maps instance if self.meta_schema is None: @@ -416,6 +411,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if loglevel is not None: logger.setLevel(logging.WARNING) # Restore default logging + def __getstate__(self): + state = self.__dict__.copy() + del state['xpath_tokens'] + state.pop('_xpath_parser', None) + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.xpath_tokens = {} + def __repr__(self): if self.url: basename = os.path.basename(self.url) @@ -457,6 +462,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): def __len__(self): return len(self.elements) + @property + def xpath_proxy(self): + return XMLSchemaProxy(self) + @property def xsd_version(self): """Property that returns the class attribute XSD_VERSION.""" diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index cb4c8d7..aa8e23b 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -395,10 +395,13 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): self.__class__.__name__, self.not_namespace, self.process_contents, self.occurs ) + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) + def _parse(self): super(XsdAnyElement, self)._parse() self._parse_particle(self.elem) - self.xpath_proxy = XMLSchemaProxy(self.schema, self) def match(self, name, default_namespace=None, resolve=False, **kwargs): """ diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 1c48722..8a215da 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -93,6 +93,20 @@ class XMLSchemaProxy(AbstractSchemaProxy): except AttributeError: raise XMLSchemaTypeError("%r is not an XsdElement" % base_element) + def bind_parser(self, parser): + if parser.schema is not self: + parser.schema = self + + try: + parser.symbol_table = self._schema.xpath_tokens[parser.__class__] + except KeyError: + parser.symbol_table = parser.__class__.symbol_table.copy() + self._schema.xpath_tokens[parser.__class__] = parser.symbol_table + for xsd_type in self.iter_atomic_types(): + parser.schema_constructor(xsd_type.name) + + parser.tokenizer = parser.create_tokenizer(parser.symbol_table) + def get_context(self): return XMLSchemaContext(root=self._schema, item=self._base_element) @@ -166,7 +180,13 @@ class ElementPathMixin(Sequence): attributes = {} namespaces = {} xpath_default_namespace = None - xpath_proxy = None + + _xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use. + + def __getstate__(self): + state = self.__dict__.copy() + state.pop('_xpath_parser', None) + return state @abstractmethod def __iter__(self): @@ -198,48 +218,56 @@ class ElementPathMixin(Sequence): """Gets an Element attribute. For compatibility with the ElementTree API.""" return self.attributes.get(key, default) - def iterfind(self, path, namespaces=None): + @property + def xpath_proxy(self): + """Returns an XPath proxy instance bound with the schema.""" + raise NotImplementedError + + def _rebind_xpath_parser(self): + """Rebind XPath 2 parser with schema component.""" + if self._xpath_parser is not None: + self._xpath_parser.schema.bind_parser(self._xpath_parser) + + def _get_xpath_namespaces(self, namespaces=None): """ - Creates and iterator for all XSD subelements matching the path. + Returns a dictionary with namespaces for XPath selection. - :param path: an XPath expression that considers the XSD component as the root element. - :param namespaces: is an optional mapping from namespace prefix to full name. - :return: an iterable yielding all matching XSD subelements in document order. + :param namespaces: an optional map from namespace prefix to namespace URI. \ + If this argument is not provided the schema's namespaces are used. """ - path = path.strip() - if path.startswith('/') and not path.startswith('//'): - path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: - namespaces = {k: v for k, v in self.namespaces.items() if k} - - parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy, - default_namespace=self.xpath_default_namespace) - root_token = parser.parse(path) - context = XMLSchemaContext(self) - return root_token.select_results(context) - - def find(self, path, namespaces=None): - """ - Finds the first XSD subelement matching the path. - - :param path: an XPath expression that considers the XSD component as the root element. - :param namespaces: an optional mapping from namespace prefix to full name. - :return: The first matching XSD subelement or ``None`` if there is not match. - """ - path = path.strip() - if path.startswith('/') and not path.startswith('//'): - path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: namespaces = {k: v for k, v in self.namespaces.items() if k} namespaces[''] = self.xpath_default_namespace elif '' not in namespaces: namespaces[''] = self.xpath_default_namespace - parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) - root_token = parser.parse(path) + xpath_namespaces = XPath2Parser.DEFAULT_NAMESPACES.copy() + xpath_namespaces.update(namespaces) + return xpath_namespaces + + def _xpath_parse(self, path, namespaces=None): + path = path.strip() + if path.startswith('/') and not path.startswith('//'): + path = ''.join(['/', XSD_SCHEMA, path]) + + namespaces = self._get_xpath_namespaces(namespaces) + if self._xpath_parser is None: + self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) + else: + self._xpath_parser.namespaces = namespaces + + return self._xpath_parser.parse(path) + + def find(self, path, namespaces=None): + """ + Finds the first XSD subelement matching the path. + + :param path: an XPath expression that considers the XSD component as the root element. + :param namespaces: an optional mapping from namespace prefix to namespace URI. + :return: The first matching XSD subelement or ``None`` if there is not match. + """ context = XMLSchemaContext(self) - return next(root_token.select_results(context), None) + return next(self._xpath_parse(path, namespaces).select_results(context), None) def findall(self, path, namespaces=None): """ @@ -250,17 +278,19 @@ class ElementPathMixin(Sequence): :return: a list containing all matching XSD subelements in document order, an empty \ list is returned if there is no match. """ - path = path.strip() - if path.startswith('/') and not path.startswith('//'): - path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: - namespaces = {k: v for k, v in self.namespaces.items() if k} - - parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy, - default_namespace=self.xpath_default_namespace) - root_token = parser.parse(path) context = XMLSchemaContext(self) - return root_token.get_results(context) + return self._xpath_parse(path, namespaces).get_results(context) + + def iterfind(self, path, namespaces=None): + """ + Creates and iterator for all XSD subelements matching the path. + + :param path: an XPath expression that considers the XSD component as the root element. + :param namespaces: is an optional mapping from namespace prefix to full name. + :return: an iterable yielding all matching XSD subelements in document order. + """ + context = XMLSchemaContext(self) + return self._xpath_parse(path, namespaces).select_results(context) def iter(self, tag=None): """ From 922a43da21fc243214ff51321949d38e9a2795cc Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 10 Oct 2019 11:23:52 +0200 Subject: [PATCH 27/36] Fix for unbound multi-schema W3C tests --- xmlschema/tests/test_w3c_suite.py | 50 ++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index a9d4d77..dbeb25c 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -244,6 +244,13 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 if test_conf: test_conf['source'] = source_path + if schema_test and not source_path.endswith('.xml'): + test_conf['sources'] = [ + os.path.normpath( + os.path.join(os.path.dirname(filename), schema_href.get('{%s}href' % XLINK_NAMESPACE)) + ) + for schema_href in elem.findall(tag) + ] return test_conf if group_num == 1: @@ -283,25 +290,37 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 class TestGroupCase(unittest.TestCase): - @unittest.skipIf(not any(g['source'].endswith('.xsd') for g in group_tests), 'No schema tests') + @unittest.skipIf(group_tests[0]['source'].endswith('.xml'), 'No schema test') def test_xsd_schema(self): for item in filter(lambda x: x['source'].endswith('.xsd'), group_tests): source = item['source'] rel_path = os.path.relpath(source) - for version, expected in sorted(filter(lambda x: x[0] != 'source', item.items())): + for version, expected in sorted(filter(lambda x: not x[0].startswith('source'), item.items())): schema_class = XMLSchema11 if version == '1.1' else XMLSchema10 if expected == 'invalid': message = "schema %s should be invalid with XSD %s" % (rel_path, version) with self.assertRaises(XMLSchemaException, msg=message): with warnings.catch_warnings(): warnings.simplefilter('ignore') - schema_class(source, use_meta=False) + if len(item['sources']) <= 1: + schema_class(source, use_meta=False) + else: + schema = schema_class(source, use_meta=False, build=False) + for other in item['sources'][1:]: + schema_class(other, global_maps=schema.maps, build=False) + schema.build() else: try: with warnings.catch_warnings(): warnings.simplefilter('ignore') - schema = schema_class(source, use_meta=False) + if len(item['sources']) <= 1: + schema = schema_class(source, use_meta=False) + else: + schema = schema_class(source, use_meta=False, build=False) + for other in item['sources'][1:]: + schema_class(other, global_maps=schema.maps, build=False) + schema.build() except XMLSchemaException as err: schema = None message = "schema %s should be valid with XSD %s, but an error is raised:" \ @@ -311,12 +330,14 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 self.assertIsInstance(schema, schema_class, msg=message) - @unittest.skipIf(not any(g['source'].endswith('.xml') for g in group_tests), 'No instance tests') + @unittest.skipIf(group_tests[0]['source'].endswith('.xsd') and len(group_tests) == 1, 'No instance tests') def test_xml_instances(self): if group_tests[0]['source'].endswith('.xsd'): schema = group_tests[0]['source'] + schemas = group_tests[0]['sources'] else: schema = None + schemas = [] for item in filter(lambda x: not x['source'].endswith('.xsd'), group_tests): source = item['source'] @@ -329,12 +350,27 @@ def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0 with self.assertRaises((XMLSchemaException, ElementTree.ParseError), msg=message): with warnings.catch_warnings(): warnings.simplefilter('ignore') - validate(source, schema=schema, cls=schema_class) + if len(schemas) <= 1: + validate(source, schema=schema, cls=schema_class) + else: + xs = schema_class(schemas[0], use_meta=False, build=False) + for other in schemas[1:]: + schema_class(other, global_maps=xs.maps, build=False) + xs.build() + xs.validate(source) else: try: with warnings.catch_warnings(): warnings.simplefilter('ignore') - validate(source, schema=schema, cls=schema_class) + if len(schemas) <= 1: + validate(source, schema=schema, cls=schema_class) + else: + xs = schema_class(schemas[0], use_meta=False, build=False) + for other in schemas[1:]: + schema_class(other, global_maps=xs.maps, build=False) + xs.build() + xs.validate(source) + except (XMLSchemaException, ElementTree.ParseError) as err: error = "instance %s should be valid with XSD %s, but an error " \ "is raised:\n\n%s" % (rel_path, version, str(err)) From 9146d94d4362aad1a2410df08713a37d272c8661 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 10 Oct 2019 14:20:58 +0200 Subject: [PATCH 28/36] Fix assertion value for schema context analisys --- xmlschema/validators/assertions.py | 19 +++++++++++-------- xmlschema/validators/facets.py | 7 ++++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index d57c532..ee7ae19 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -9,7 +9,8 @@ # @author Davide Brunato # from __future__ import unicode_literals -from elementpath import datatypes, XPath2Parser, XPathContext, ElementPathError +from elementpath import XPath2Parser, XPathContext, ElementPathError +from elementpath.datatypes import XSD_BUILTIN_TYPES from ..qnames import XSD_ASSERT from ..xpath import ElementPathMixin, XMLSchemaProxy @@ -62,17 +63,15 @@ class XsdAssert(XsdComponent, ElementPathMixin): return self.token is not None and (self.base_type.parent is None or self.base_type.built) def parse_xpath_test(self): - if self.base_type.has_simple_content(): - variables = {'value': datatypes.XSD_BUILTIN_TYPES['anyType'].value} - elif self.base_type.is_complex(): + if not self.base_type.has_simple_content(): + variables = {'value': XSD_BUILTIN_TYPES['anyType'].value} + else: try: builtin_type_name = self.base_type.content_type.primitive_type.local_name except AttributeError: - variables = {'value': datatypes.XSD_BUILTIN_TYPES['anySimpleType'].value} + variables = {'value': XSD_BUILTIN_TYPES['anySimpleType'].value} else: - variables = {'value': datatypes.XSD_BUILTIN_TYPES[builtin_type_name].value} - else: - variables = None + variables = {'value': XSD_BUILTIN_TYPES[builtin_type_name].value} self.parser = XPath2Parser( namespaces=self.namespaces, @@ -125,3 +124,7 @@ class XsdAssert(XsdComponent, ElementPathMixin): @property def type(self): return self.parent + + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 7e56574..e018229 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -14,7 +14,8 @@ This module contains declarations and classes for XML Schema constraint facets. from __future__ import unicode_literals import re import operator -from elementpath import XPath2Parser, ElementPathError, datatypes +from elementpath import XPath2Parser, ElementPathError +from elementpath.datatypes import XSD_BUILTIN_TYPES from ..compat import unicode_type, MutableSequence from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ @@ -713,9 +714,9 @@ class XsdAssertionFacet(XsdFacet): try: builtin_type_name = self.base_type.primitive_type.local_name - variables = {'value': datatypes.XSD_BUILTIN_TYPES[builtin_type_name].value} + variables = {'value': XSD_BUILTIN_TYPES[builtin_type_name].value} except AttributeError: - variables = {'value': datatypes.XSD_BUILTIN_TYPES['anySimpleType'].value} + variables = {'value': XSD_BUILTIN_TYPES['anySimpleType'].value} if 'xpathDefaultNamespace' in self.elem.attrib: self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) From 1a06be74771e7525673ee7d88985d3010b579eca Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 10 Oct 2019 14:21:55 +0200 Subject: [PATCH 29/36] Fix the parse of keyref's refer to skip key references --- xmlschema/validators/identities.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 58d2d0e..1e51d95 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -269,9 +269,10 @@ class XsdKeyref(XsdIdentity): elif isinstance(self.refer, (XsdKey, XsdUnique)): return # referenced key/unique identity constraint already set - try: - self.refer = self.parent.identities[self.refer] - except KeyError: + refer = self.parent.identities.get(self.refer) + if refer is not None and refer.ref is None: + self.refer = refer + else: try: self.refer = self.maps.identities[self.refer] except KeyError: From 588f17a1f9a59e25dfb62d57db248364bfba92f7 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 10 Oct 2019 20:34:40 +0200 Subject: [PATCH 30/36] Fix xs:error type decoding/encoding --- xmlschema/validators/simple_types.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 2e9fd63..0e8cb46 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -549,6 +549,11 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.decode_error(validation, obj, self.to_python, reason=str(err)) yield None return + except TypeError: + # xs:error type (eg. an XSD 1.1 type alternative used to catch invalid values) + yield self.validation_error(validation, "Invalid value {!r}".format(obj)) + yield None + return for validator in self.validators: for error in validator(result): @@ -587,6 +592,10 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.encode_error(validation, obj, self.from_python) yield None return + except TypeError: + yield self.validation_error(validation, "Invalid value {!r}".format(obj)) + yield None + return for validator in self.validators: for error in validator(obj): From 997c59c837c4c526a86c88de6bb8b64d67b8abc7 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Thu, 10 Oct 2019 20:35:45 +0200 Subject: [PATCH 31/36] Fix locations argument usage - Used to import other namespaces explicitly (as disposed by "xsi:schemaLocation" in an XML instance). - It's not propagated to included/imported schemas anymore. --- xmlschema/validators/schema.py | 123 +++++++++++------- .../schemas/XSD_1.1/xsd11-extra.xsd | 4 +- 2 files changed, 75 insertions(+), 52 deletions(-) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index faeec6e..3c71311 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -158,8 +158,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param converter: is an optional argument that can be an :class:`XMLSchemaConverter` \ subclass or instance, used for defining the default XML data converter for XML Schema instance. :type converter: XMLSchemaConverter or None - :param locations: schema location hints for namespace imports. Can be a dictionary or \ - a sequence of couples (namespace URI, resource URL). + :param locations: schema location hints, that can include additional namespaces to \ + import after processing schema's import statements. Usually filled with the couples \ + (namespace, url) extracted from xsi:schemaLocations. Can be a dictionary or a sequence \ + of couples (namespace URI, resource URL). :type locations: dict or list or None :param base_url: is an optional base URL, used for the normalization of relative paths \ when the URL of the schema resource can't be obtained from the source argument. @@ -216,7 +218,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype maps: XsdGlobals :ivar converter: the default converter used for XML data decoding/encoding. :vartype converter: XMLSchemaConverter - :ivar locations: schema location hints. + :ivar locations: schemas location hints. :vartype locations: NamespaceResourcesMap :ivar namespaces: a dictionary that maps from the prefixes used by the schema into namespace URI. :vartype namespaces: dict @@ -349,7 +351,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): elif global_maps is None: if use_meta is False: self.maps = XsdGlobals(self, validation) - self.locations.update(self.BASE_SCHEMAS) elif self.target_namespace not in self.BASE_SCHEMAS: if not self.meta_schema.maps.types: self.meta_schema.maps.build() @@ -384,9 +385,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces): self.parse_error(e.reason, elem=e.elem) - # Includes and imports schemas (errors are treated as warnings) - self._include_schemas() - self._import_namespaces() + # Inclusions and imports schemas (errors are treated as warnings) + self._parse_inclusions() + self._parse_imports() + + # Imports by argument (usually from XML schemaLocation attribute). + for ns in self.locations: + if ns not in self.maps.namespaces: + self._import_namespace(ns, self.locations[ns]) if '' not in self.namespaces: self.namespaces[''] = '' # For default local names are mapped to no namespace @@ -612,9 +618,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param source: an optional argument referencing to or containing the XSD meta-schema \ resource. Required if the schema class doesn't already have a meta-schema. - :param base_schemas: an optional dictionary that contains namespace URIs and schema locations. \ - If provided it's used as substitute for class 's BASE_SCHEMAS. Also a sequence of (namespace, \ - location) items can be provided if there are more schema documents for one or more namespaces. + :param base_schemas: an optional dictionary that contains namespace URIs and \ + schema locations. If provided it's used as substitute for class 's BASE_SCHEMAS. \ + Also a sequence of (namespace, location) items can be provided if there are more \ + schema documents for one or more namespaces. :param global_maps: is an optional argument containing an :class:`XsdGlobals` \ instance for the new meta schema. If not provided a new map is created. """ @@ -851,7 +858,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): else: return self.find(path, namespaces) - def _include_schemas(self): + def _parse_inclusions(self): """Processes schema document inclusions and redefinitions.""" for child in filter(lambda x: x.tag == XSD_INCLUDE, self.root): try: @@ -915,8 +922,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): break else: schema = self.create_schema( - schema_url, self.target_namespace, self.validation, self.maps, self.converter, - self.locations, self.base_url, self.defuse, self.timeout, False + source=schema_url, + namespace=self.target_namespace, + validation=self.validation, + global_maps=self.maps, + converter=self.converter, + base_url=self.base_url, + defuse=self.defuse, + timeout=self.timeout, + build=False, ) if location not in self.includes: @@ -925,10 +939,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.includes[schema_url] = schema return schema - def _import_namespaces(self): + def _parse_imports(self): """ - Processes namespace imports. Imports are done on namespace basis not on resource: this - is the standard and also avoids import loops that sometimes are hard to detect. + Parse namespace import elements. Imports are done on namespace basis, not on + single resource. A warning is generated for a failure of a namespace import. """ namespace_imports = NamespaceResourcesMap(map( lambda x: (x.get('namespace'), x.get('schemaLocation')), @@ -973,38 +987,41 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if namespace in self.FALLBACK_LOCATIONS: locations.append(self.FALLBACK_LOCATIONS[namespace]) - import_error = None - for url in locations: - try: - logger.debug("Import namespace %r from %r", namespace, url) - self.import_schema(namespace, url, self.base_url) - except (OSError, IOError) as err: - # It's not an error if the location access fails (ref. section 4.2.6.2): - # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport - logger.debug('%s', err) - if import_error is None: - import_error = err - except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: - if namespace: - msg = "cannot import namespace %r: %s." % (namespace, err) - else: - msg = "cannot import chameleon schema: %s." % err - if isinstance(err, (XMLSchemaParseError, ParseError)): - self.parse_error(msg) - elif self.validation == 'strict': - raise type(err)(msg) - else: - self.errors.append(type(err)(msg)) - except XMLSchemaValueError as err: - self.parse_error(err) + self._import_namespace(namespace, locations) + + def _import_namespace(self, namespace, locations): + import_error = None + for url in locations: + try: + logger.debug("Import namespace %r from %r", namespace, url) + self.import_schema(namespace, url, self.base_url) + except (OSError, IOError) as err: + # It's not an error if the location access fails (ref. section 4.2.6.2): + # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport + logger.debug('%s', err) + if import_error is None: + import_error = err + except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: + if namespace: + msg = "cannot import namespace %r: %s." % (namespace, err) else: - logger.info("Namespace %r imported from %r", namespace, url) - break + msg = "cannot import chameleon schema: %s." % err + if isinstance(err, (XMLSchemaParseError, ParseError)): + self.parse_error(msg) + elif self.validation == 'strict': + raise type(err)(msg) + else: + self.errors.append(type(err)(msg)) + except XMLSchemaValueError as err: + self.parse_error(err) else: - if import_error is not None: - self.warnings.append("Namespace import failed: %s." % str(import_error)) - warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3) - self.imports[namespace] = None + logger.info("Namespace %r imported from %r", namespace, url) + break + else: + if import_error is not None: + self.warnings.append("Namespace import failed: %s." % str(import_error)) + warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3) + self.imports[namespace] = None def import_schema(self, namespace, location, base_url=None, force=False): """ @@ -1033,8 +1050,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): return schema schema = self.create_schema( - schema_url, None, self.validation, self.maps, self.converter, - self.locations, self.base_url, self.defuse, self.timeout, False + source=schema_url, + validation=self.validation, + global_maps=self.maps, + converter=self.converter, + base_url=self.base_url, + defuse=self.defuse, + timeout=self.timeout, + build=False, ) if schema.target_namespace != namespace: raise XMLSchemaValueError('imported schema %r has an unmatched namespace %r' % (location, namespace)) @@ -1536,8 +1559,8 @@ class XMLSchema11(XMLSchemaBase): XHTML_NAMESPACE: XHTML_SCHEMA_FILE, } - def _include_schemas(self): - super(XMLSchema11, self)._include_schemas() + def _parse_inclusions(self): + super(XMLSchema11, self)._parse_inclusions() for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.root): try: diff --git a/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd index fb26c03..ba49a10 100644 --- a/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd +++ b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd @@ -3,7 +3,7 @@ Chameleon schema for defining XSD 1.1 list type builtins and to override openContent/defaultOpenContent declarations for the xmlschema library. --> - + @@ -104,4 +104,4 @@ openContent/defaultOpenContent declarations for the xmlschema library. - \ No newline at end of file + From a79a5583ae081f86605bb80635c27aa6d19a3870 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 11 Oct 2019 13:26:17 +0200 Subject: [PATCH 32/36] Fix datetime types validation on encoding - Add is_datetime() to XSD types classes --- xmlschema/etree.py | 2 +- xmlschema/validators/schema.py | 3 +- xmlschema/validators/simple_types.py | 50 ++++++++++++---------------- xmlschema/validators/xsdbase.py | 9 ++++- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/xmlschema/etree.py b/xmlschema/etree.py index d79d7da..b235f48 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -23,7 +23,7 @@ except ImportError: lxml_etree = None from .compat import PY3 -from .exceptions import XMLSchemaTypeError, XMLSchemaValueError +from .exceptions import XMLSchemaTypeError from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE, get_namespace from .qnames import get_qname, qname_to_prefixed diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 3c71311..2f4791d 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -385,11 +385,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces): self.parse_error(e.reason, elem=e.elem) - # Inclusions and imports schemas (errors are treated as warnings) self._parse_inclusions() self._parse_imports() - # Imports by argument (usually from XML schemaLocation attribute). + # Imports by argument (usually from xsi:schemaLocation attribute). for ns in self.locations: if ns not in self.maps.namespaces: self._import_namespace(ns, self.locations[ns]) diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index 0e8cb46..62bed94 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -400,6 +400,7 @@ class XsdAtomic(XsdSimpleType): a base_type attribute that refers to primitive or derived atomic built-in type or another derived simpleType. """ + to_python = str _special_types = {XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE} _ADMITTED_TAGS = {XSD_RESTRICTION, XSD_SIMPLE_TYPE} @@ -502,6 +503,9 @@ class XsdAtomicBuiltin(XsdAtomic): def admitted_facets(self): return self._admitted_facets or self.primitive_type.admitted_facets + def is_datetime(self): + return self.to_python.__name__ == 'fromstring' + def iter_decode(self, obj, validation='lax', **kwargs): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) @@ -1151,35 +1155,21 @@ class XsdAtomicRestriction(XsdAtomic): if self.is_list(): if not hasattr(obj, '__iter__') or isinstance(obj, (str, unicode_type, bytes)): obj = [] if obj is None or obj == '' else [obj] - - if validation != 'skip' and obj is not None: - for validator in self.validators: - for error in validator(obj): - yield error - - for result in self.base_type.iter_encode(obj, validation): - if isinstance(result, XMLSchemaValidationError): - yield result - if isinstance(result, XMLSchemaEncodeError): - yield unicode_type(obj) if validation == 'skip' else None - return - else: - yield result - return - - if isinstance(obj, (string_base_type, bytes)): - obj = self.normalize(obj) - - if self.base_type.is_simple(): base_type = self.base_type - elif self.base_type.has_simple_content(): - base_type = self.base_type.content_type - elif self.base_type.mixed: - yield unicode_type(obj) - return else: - raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " - "simple or mixed content required." % self.base_type) + if isinstance(obj, (string_base_type, bytes)): + obj = self.normalize(obj) + + if self.base_type.is_simple(): + base_type = self.base_type + elif self.base_type.has_simple_content(): + base_type = self.base_type.content_type + elif self.base_type.mixed: + yield unicode_type(obj) + return + else: + raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " + "simple or mixed content required." % self.base_type) for result in base_type.iter_encode(obj, validation): if isinstance(result, XMLSchemaValidationError): @@ -1188,7 +1178,11 @@ class XsdAtomicRestriction(XsdAtomic): yield unicode_type(obj) if validation == 'skip' else None return else: - if validation != 'skip' and obj is not None: + if validation != 'skip' and self.validators and obj is not None: + if isinstance(obj, (string_base_type, bytes)): + if self.primitive_type.is_datetime(): + obj = self.primitive_type.to_python(obj) + for validator in self.validators: for error in validator(obj): yield error diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index cf450ce..152f0ee 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -625,7 +625,14 @@ class XsdType(XsdComponent): @staticmethod def is_atomic(): """Returns `True` if the instance is an atomic simpleType, `False` otherwise.""" - return None + return False + + @staticmethod + def is_datetime(): + """ + Returns `True` if the instance is a datetime/duration XSD builtin-type, `False` otherwise. + """ + return False def is_empty(self): """Returns `True` if the instance has an empty value or content, `False` otherwise.""" From d89a597c82d6f2ff468228c25e6664642d1812f5 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Fri, 11 Oct 2019 13:44:10 +0200 Subject: [PATCH 33/36] Update documentation and fix PEP8 errors --- CHANGELOG.rst | 8 ++++++++ doc/usage.rst | 18 +++++++++--------- publiccode.yml | 2 +- xmlschema/qnames.py | 1 - .../tests/validators/test_complex_types.py | 4 ++-- xmlschema/tests/validators/test_wildcards.py | 2 +- xmlschema/validators/exceptions.py | 2 +- xmlschema/validators/wildcards.py | 6 ++++-- 8 files changed, 26 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f533006..f1417d1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,13 @@ CHANGELOG ********* +`v1.0.15`_ (2019-10-11) +======================= +* Improved XPath 2.0 bindings +* Added logging for schema initialization and building +* Removed XLink namespace from meta-schema (loaded from a fallback location like XHTML) +* Fixed half of failed W3C instance tests (remain 266 over 15344 tests) + `v1.0.14`_ (2019-08-27) ======================= * Added XSD 1.1 validator with class *XMLSchema11* @@ -256,3 +263,4 @@ v0.9.6 (2017-05-05) .. _v1.0.11: https://github.com/brunato/xmlschema/compare/v1.0.10...v1.0.11 .. _v1.0.13: https://github.com/brunato/xmlschema/compare/v1.0.11...v1.0.13 .. _v1.0.14: https://github.com/brunato/xmlschema/compare/v1.0.13...v1.0.14 +.. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15 diff --git a/doc/usage.rst b/doc/usage.rst index 9a8dbda..fda3cde 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -103,21 +103,21 @@ The global maps can be accessed through :attr:`XMLSchema.maps` attribute: >>> from pprint import pprint >>> pprint(sorted(schema.maps.types.keys())[:5]) ['{http://example.com/vehicles}vehicleType', - '{http://www.w3.org/1999/xlink}actuateType', - '{http://www.w3.org/1999/xlink}arcType', - '{http://www.w3.org/1999/xlink}arcroleType', - '{http://www.w3.org/1999/xlink}extended'] + '{http://www.w3.org/2001/XMLSchema}ENTITIES', + '{http://www.w3.org/2001/XMLSchema}ENTITY', + '{http://www.w3.org/2001/XMLSchema}ID', + '{http://www.w3.org/2001/XMLSchema}IDREF'] >>> pprint(sorted(schema.maps.elements.keys())[:10]) ['{http://example.com/vehicles}bikes', '{http://example.com/vehicles}cars', '{http://example.com/vehicles}vehicles', - '{http://www.w3.org/1999/xlink}arc', - '{http://www.w3.org/1999/xlink}locator', - '{http://www.w3.org/1999/xlink}resource', - '{http://www.w3.org/1999/xlink}title', '{http://www.w3.org/2001/XMLSchema}all', '{http://www.w3.org/2001/XMLSchema}annotation', - '{http://www.w3.org/2001/XMLSchema}any'] + '{http://www.w3.org/2001/XMLSchema}any', + '{http://www.w3.org/2001/XMLSchema}anyAttribute', + '{http://www.w3.org/2001/XMLSchema}appinfo', + '{http://www.w3.org/2001/XMLSchema}attribute', + '{http://www.w3.org/2001/XMLSchema}attributeGroup'] Schema objects include methods for finding XSD elements and attributes in the schema. Those are methods ot the ElementTree's API, so you can use an XPath expression for diff --git a/publiccode.yml b/publiccode.yml index bd8ed3f..ce4e5e4 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-xx-xx' +releaseDate: '2019-10-11' softwareVersion: v1.0.15 developmentStatus: stable platforms: diff --git a/xmlschema/qnames.py b/xmlschema/qnames.py index ae5ec65..eb4f27d 100644 --- a/xmlschema/qnames.py +++ b/xmlschema/qnames.py @@ -185,7 +185,6 @@ XSD_YEAR_MONTH_DURATION = XSD_TEMPLATE % 'yearMonthDuration' XSD_ERROR = XSD_TEMPLATE % 'error' - def get_qname(uri, name): """ Returns an expanded QName from URI and local part. If any argument has boolean value diff --git a/xmlschema/tests/validators/test_complex_types.py b/xmlschema/tests/validators/test_complex_types.py index 6f65c3b..263f02c 100644 --- a/xmlschema/tests/validators/test_complex_types.py +++ b/xmlschema/tests/validators/test_complex_types.py @@ -276,7 +276,7 @@ class TestXsdComplexType(XsdValidatorTestCase): def test_upa_violation_with_wildcard(self): self.check_schema(""" + targetNamespace="tns" xmlns:ns="tns" elementFormDefault="unqualified"> @@ -295,7 +295,7 @@ class TestXsdComplexType(XsdValidatorTestCase): - + """, XMLSchemaModelError if self.schema_class.XSD_VERSION == '1.0' else None) diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py index 3af1516..e8ebce5 100644 --- a/xmlschema/tests/validators/test_wildcards.py +++ b/xmlschema/tests/validators/test_wildcards.py @@ -247,7 +247,7 @@ class TestXsd11Wildcards(TestXsdWildcards): - + diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index e47a1ec..65a2086 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -202,7 +202,7 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): if not isinstance(obj, string_base_type): _obj = obj else: - _obj = obj.encode('ascii', 'xmlcharrefreplace').decode('utf-8') + _obj = obj.encode('ascii', 'xmlcharrefreplace').decode('utf-8') super(XMLSchemaValidationError, self).__init__( validator=validator, diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index aa8e23b..ade601b 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -250,8 +250,10 @@ class XsdWildcard(XsdComponent, ValidationMixin): if not self.not_qname: self.not_qname = other.not_qname[:] else: - self.not_qname = [x for x in self.not_qname if x in other.not_qname or - not other.is_namespace_allowed(get_namespace(x))] + self.not_qname = [ + x for x in self.not_qname + if x in other.not_qname or not other.is_namespace_allowed(get_namespace(x)) + ] if self.not_namespace: if other.not_namespace: From 22fdcc9a5afb0cce937d24b4152b181b8491cfe3 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 12 Oct 2019 11:23:31 +0200 Subject: [PATCH 34/36] Fix etree_to_string to avoid registering reserved prefixes --- xmlschema/etree.py | 8 +++++--- xmlschema/validators/exceptions.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/xmlschema/etree.py b/xmlschema/etree.py index b235f48..7c4d28f 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -153,19 +153,21 @@ def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_ if isinstance(elem, etree_element): if namespaces: for prefix, uri in namespaces.items(): - etree_register_namespace(prefix, uri) + if not re.match(r'ns\d+$', prefix): + etree_register_namespace(prefix, uri) tostring = ElementTree.tostring elif isinstance(elem, py_etree_element): if namespaces: for prefix, uri in namespaces.items(): - PyElementTree.register_namespace(prefix, uri) + if not re.match(r'ns\d+$', prefix): + PyElementTree.register_namespace(prefix, uri) tostring = PyElementTree.tostring elif lxml_etree is not None: if namespaces: for prefix, uri in namespaces.items(): - if prefix: + if prefix and not re.match(r'ns\d+$', prefix): lxml_etree_register_namespace(prefix, uri) tostring = lxml_etree.tostring else: diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index 65a2086..a7c6ea9 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -224,7 +224,7 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): msg.append('Reason: %s\n' % self.reason) if hasattr(self.validator, 'tostring'): msg.append("Schema:\n\n%s\n" % self.validator.tostring(' ', 20)) - if self.elem is not None: + if is_etree_element(self.elem): elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20) if hasattr(self.elem, 'sourceline'): msg.append("Instance (line %r):\n\n%s\n" % (self.elem.sourceline, elem_as_string)) From de7e2343bd3da73652266de995b64715dee9018f Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 12 Oct 2019 21:19:01 +0200 Subject: [PATCH 35/36] Implement element substitution and xsi:type block in instances --- xmlschema/validators/complex_types.py | 37 ++++++++------------------- xmlschema/validators/elements.py | 16 +++++++++--- xmlschema/validators/groups.py | 17 ++++++++++-- xmlschema/validators/xsdbase.py | 24 +++++++++-------- 4 files changed, 50 insertions(+), 44 deletions(-) diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 1a7fe2b..e45ff30 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -52,10 +52,10 @@ class XsdComplexType(XsdType, ValidationMixin): mixed = False assertions = () open_content = None + _block = None _ADMITTED_TAGS = {XSD_COMPLEX_TYPE, XSD_RESTRICTION} _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE} - _block = None @staticmethod def normalize(text): @@ -149,19 +149,10 @@ class XsdComplexType(XsdType, ValidationMixin): return self.base_type = base_type = self._parse_base_type(derivation_elem) - - block = base_type.block - if self._block is None and block: - self._block = block - if derivation_elem.tag == XSD_RESTRICTION: self._parse_simple_content_restriction(derivation_elem, base_type) - if base_type.blocked or 'restriction' in block and base_type != self: - self.blocked = True else: self._parse_simple_content_extension(derivation_elem, base_type) - if base_type.blocked or 'extension' in block and base_type != self: - self.blocked = True if content_elem is not elem[-1]: k = 2 if content_elem is not elem[0] else 1 @@ -184,24 +175,15 @@ class XsdComplexType(XsdType, ValidationMixin): return base_type = self._parse_base_type(derivation_elem, complex_content=True) - if base_type is not self: self.base_type = base_type elif self.redefine: self.base_type = self.redefine - block = base_type.block - if self._block is None and block: - self._block = block - if derivation_elem.tag == XSD_RESTRICTION: self._parse_complex_content_restriction(derivation_elem, base_type) - if base_type.blocked or 'restriction' in block and base_type != self: - self.blocked = True else: self._parse_complex_content_extension(derivation_elem, base_type) - if base_type.blocked or 'extension' in block and base_type != self: - self.blocked = True if content_elem is not elem[-1]: k = 2 if content_elem is not elem[0] else 1 @@ -450,6 +432,10 @@ class XsdComplexType(XsdType, ValidationMixin): self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes) + @property + def block(self): + return self.schema.block_default if self._block is None else self._block + @property def built(self): return self.content_type.parent is not None or self.content_type.built @@ -458,10 +444,6 @@ class XsdComplexType(XsdType, ValidationMixin): def validation_attempted(self): return 'full' if self.built else self.content_type.validation_attempted - @property - def block(self): - return self.schema.block_default if self._block is None else self._block - @staticmethod def is_simple(): return False @@ -514,14 +496,15 @@ class XsdComplexType(XsdType, ValidationMixin): self.base_type.is_valid(source, use_defaults, namespaces) def is_derived(self, other, derivation=None): + if derivation and derivation == self.derivation: + derivation = None # derivation mode checked + if self is other: - return True - elif derivation and self.derivation and derivation != self.derivation and other.is_complex(): - return False + return derivation is None elif other.name == XSD_ANY_TYPE: return True elif self.base_type is other: - return True + return derivation is None or self.base_type.derivation == derivation elif hasattr(other, 'member_types'): return any(self.is_derived(m, derivation) for m in other.member_types) elif self.base_type is None: diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 5a39819..a5fdc3f 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -358,11 +358,19 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) @property def final(self): - return self._final or self.schema.final_default if self.ref is None else self.ref.final + if self.ref is not None: + return self.ref.final + elif self._final is not None: + return self._final + return self.schema.final_default @property def block(self): - return self._block or self.schema.block_default if self.ref is None else self.ref.block + if self.ref is not None: + return self.ref.block + elif self._block is not None: + return self._block + return self.schema.block_default @property def nillable(self): @@ -479,8 +487,8 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) - if xsd_type.is_blocked(self.block): - yield self.validation_error(validation, "usage of %r is blocked" % xsd_type, elem, **kwargs) + if xsd_type.is_blocked(self): + yield self.validation_error(validation, "usage of %r is blocked" % xsd_type, elem, **kwargs) # Decode attributes attribute_group = self.get_attributes(xsd_type) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index ed27409..57dcb60 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -481,6 +481,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): return other_max_occurs >= max_occurs * self.max_occurs def check_dynamic_context(self, elem, xsd_element, model_element, converter): + if model_element is not xsd_element: + if 'substitution' in model_element.block \ + or xsd_element.type.is_blocked(model_element): + raise XMLSchemaValidationError( + model_element, "substitution of %r is blocked" % model_element + ) + alternatives = () if isinstance(xsd_element, XsdAnyElement): if xsd_element.process_contents == 'skip': @@ -707,8 +714,10 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): content = model.iter_unordered_content(element_data.content) elif converter.losslessly: content = element_data.content - else: + elif isinstance(element_data.content, list): content = model.iter_collapsed_content(element_data.content) + else: + content = [] for index, (name, value) in enumerate(content): if isinstance(name, int): @@ -775,7 +784,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n') - if validation != 'skip' and errors: + if validation != 'skip' and (errors or not content): attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()} if validation == 'lax' and converter.etree_element_class is not etree_element: child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children] @@ -783,6 +792,10 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: elem = converter.etree_element(element_data.tag, text, children, attrib) + if not content: + reason = "wrong content type {!r}".format(type(element_data.content)) + yield self.validation_error(validation, reason, elem, **kwargs) + for index, particle, occurs, expected in errors: yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs) diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 152f0ee..13393ee 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -569,8 +569,7 @@ class XsdType(XsdComponent): """Common base class for XSD types.""" abstract = False - blocked = False - block = '' + block = None base_type = None derivation = None redefine = None @@ -664,17 +663,20 @@ class XsdType(XsdComponent): def is_derived(self, other, derivation=None): raise NotImplementedError - def is_blocked(self, block=''): - if self.blocked: - return True - elif not block: + def is_blocked(self, xsd_element): + """ + Returns `True` if the base type derivation is blocked, `False` otherwise. + """ + xsd_type = xsd_element.type + if self is xsd_type: return False - elif self.derivation and self.derivation in block: - return True - elif self.base_type is None: + + block = ('%s %s' % (xsd_element.block, xsd_type.block)).strip() + if not block: return False - else: - return self.base_type.is_blocked(block) + block = {x for x in block.split() if x in ('extension', 'restriction')} + + return any(self.is_derived(xsd_type, derivation) for derivation in block) def is_dynamic_consistent(self, other): return self.is_derived(other) or hasattr(other, 'member_types') and \ From 249e555659363aea9061b6823b37c6fc67f04a96 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sun, 13 Oct 2019 11:01:05 +0200 Subject: [PATCH 36/36] Improve content model encoding - Update iter_collapsed_content() to perform a model conforming reordering. --- CHANGELOG.rst | 7 +- publiccode.yml | 2 +- .../tests/test_factory/validation_tests.py | 6 +- xmlschema/tests/test_models.py | 156 ++++++++++++++++++ xmlschema/tests/validation/test_encoding.py | 4 +- xmlschema/validators/exceptions.py | 6 +- xmlschema/validators/groups.py | 6 +- xmlschema/validators/models.py | 46 ++++-- xmlschema/validators/schema.py | 4 +- 9 files changed, 210 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f1417d1..213513e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,12 +2,13 @@ CHANGELOG ********* -`v1.0.15`_ (2019-10-11) +`v1.0.15`_ (2019-10-13) ======================= * Improved XPath 2.0 bindings -* Added logging for schema initialization and building +* Added logging for schema initialization and building (handled with argument *loglevel*) +* Update encoding of collapsed contents with a new model based reordering method * Removed XLink namespace from meta-schema (loaded from a fallback location like XHTML) -* Fixed half of failed W3C instance tests (remain 266 over 15344 tests) +* Fixed half of failed W3C instance tests (remain 255 over 15344 tests) `v1.0.14`_ (2019-08-27) ======================= diff --git a/publiccode.yml b/publiccode.yml index ce4e5e4..bfe5e7b 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-10-11' +releaseDate: '2019-10-13' softwareVersion: v1.0.15 developmentStatus: stable platforms: diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py index 3374cbd..dfd2d50 100644 --- a/xmlschema/tests/test_factory/validation_tests.py +++ b/xmlschema/tests/test_factory/validation_tests.py @@ -98,7 +98,11 @@ def make_validator_test_class(test_file, test_args, test_num, schema_class, chec for _ in iter_nested_items(data1, dict_class=ordered_dict_class): pass - elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs) + try: + elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs) + except XMLSchemaValidationError as err: + raise AssertionError(str(err) + msg_tmpl % "error during re-encoding") + if isinstance(elem1, tuple): # When validation='lax' if converter is not ParkerConverter: diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py index 60618e8..3748ead 100644 --- a/xmlschema/tests/test_models.py +++ b/xmlschema/tests/test_models.py @@ -580,6 +580,7 @@ class TestModelValidation11(TestModelValidation): class TestModelBasedSorting(XsdValidatorTestCase): def test_sort_content(self): + # test of ModelVisitor's sort_content/iter_unordered_content schema = self.get_schema(""" @@ -641,6 +642,161 @@ class TestModelBasedSorting(XsdValidatorTestCase): model.sort_content([('B3', True), ('B2', 10)]), [('B2', 10), ('B3', True)] ) + def test_iter_collapsed_content_with_optional_elements(self): + schema = self.get_schema(""" + + + + + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B3', 10), ('B4', None), ('B5', True), ('B6', 'alpha'), ('B7', 20)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B5', True), ('B6', 'alpha'), ('B7', 20)] # Missing B4 + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + def test_iter_collapsed_content_with_repeated_elements(self): + schema = self.get_schema(""" + + + + + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [ + ('B3', 10), ('B4', None), ('B5', True), ('B5', False), ('B6', 'alpha'), ('B7', 20) + ] + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True), + ('B5', False), ('B6', 'alpha'), ('B7', 20), ('B7', 30)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True), ('B5', False)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + def test_iter_collapsed_content_with_repeated_groups(self): + schema = self.get_schema(""" + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)] + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4)] + ) + + # Model broken by unknown element at start + content = [('X', None), ('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('X', None), ('B1', 2), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('B1', 2), ('X', None), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('X', None), ('B2', 4)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('X', None), ('B2', 4)] + ) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4), ('X', None)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4), ('X', None)] + ) + + def test_iter_collapsed_content_with_single_elements(self): + schema = self.get_schema(""" + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B1', 'abc'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B3', False), ('B1', 'abc'), ('B2', 10)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 'abc'), ('B3', False), ('B2', 10)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 'abc'), ('B1', 'def'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 'abc'), ('B2', 10), ('B3', False), ('B1', 'def')] + ) + + content = [('B1', 'abc'), ('B2', 10), ('X', None)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('X', None), ('B1', 'abc'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py index 30a90d5..ffa6623 100644 --- a/xmlschema/tests/validation/test_encoding.py +++ b/xmlschema/tests/validation/test_encoding.py @@ -374,8 +374,8 @@ class TestEncoding(XsdValidatorTestCase): """) - with self.assertRaises(XMLSchemaChildrenValidationError): - schema.to_etree({"A": [1, 2], "B": [3, 4]}) + root = schema.to_etree(ordered_dict_class([('A', [1, 2]), ('B', [3, 4])])) + self.assertListEqual([e.text for e in root], ['1', '3', '2', '4']) root = schema.to_etree({"A": [1, 2], "B": [3, 4]}, converter=UnorderedConverter) self.assertListEqual([e.text for e in root], ['1', '3', '2', '4']) diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index a7c6ea9..3ed988f 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -225,7 +225,11 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): if hasattr(self.validator, 'tostring'): msg.append("Schema:\n\n%s\n" % self.validator.tostring(' ', 20)) if is_etree_element(self.elem): - elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20) + try: + elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20) + except (ValueError, TypeError): + elem_as_string = repr(self.elem) + if hasattr(self.elem, 'sourceline'): msg.append("Instance (line %r):\n\n%s\n" % (self.elem.sourceline, elem_as_string)) else: diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 57dcb60..e5345b1 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -712,12 +712,12 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): cdata_index = 0 if isinstance(element_data.content, dict) or kwargs.get('unordered'): content = model.iter_unordered_content(element_data.content) + elif not isinstance(element_data.content, list): + content = [] elif converter.losslessly: content = element_data.content - elif isinstance(element_data.content, list): - content = model.iter_collapsed_content(element_data.content) else: - content = [] + content = ModelVisitor(self).iter_collapsed_content(element_data.content) for index, (name, value) in enumerate(content): if isinstance(name, int): diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py index e09ea7b..7a904f4 100644 --- a/xmlschema/validators/models.py +++ b/xmlschema/validators/models.py @@ -607,26 +607,42 @@ class ModelVisitor(MutableSequence): """ prev_name = None unordered_content = defaultdict(deque) + for name, value in content: if isinstance(name, int) or self.element is None: yield name, value - elif prev_name != name: + continue + + while self.element is not None: + if self.element.is_matching(name): + yield name, value + prev_name = name + for _ in self.advance(True): + pass + break + + for key in unordered_content: + if self.element.is_matching(key): + break + else: + if prev_name == name: + unordered_content[name].append(value) + break + + for _ in self.advance(False): + pass + continue + + try: + yield key, unordered_content[key].popleft() + except IndexError: + del unordered_content[key] + else: + for _ in self.advance(True): + pass + else: yield name, value prev_name = name - elif self.element.is_matching(name): - yield name, value - else: - unordered_content[name].append(value) - while self.element is not None and unordered_content: - for key in unordered_content: - if self.element.is_matching(key): - try: - yield name, unordered_content[key].popleft() - except IndexError: - del unordered_content[key] - break - else: - break # Add the remaining consumable content onto the end of the data. for name, values in unordered_content.items(): diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index 2f4791d..321809f 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -178,7 +178,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): namespace has been overridden by an import. Ignored if the argument *global_maps* is provided. :type use_meta: bool :param loglevel: for setting a different logging level for schema initialization \ - and building. For default is WARNING (30). + and building. For default is WARNING (30). For INFO level set it with 20, for \ + DEBUG level with 10. The default loglevel is restored after schema building, \ + when exiting the initialization method. :type loglevel: int :cvar XSD_VERSION: store the XSD version (1.0 or 1.1).