diff --git a/.gitignore b/.gitignore index 3c1ce44..710bdda 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,8 @@ *.json .idea/ .tox/ -.coverage +.coverage* +!.coveragerc .ipynb_checkpoints/ doc/_*/ dist/ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bba60c5..213513e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,20 @@ CHANGELOG ********* +`v1.0.15`_ (2019-10-13) +======================= +* Improved XPath 2.0 bindings +* Added logging for schema initialization and building (handled with argument *loglevel*) +* Update encoding of collapsed contents with a new model based reordering method +* Removed XLink namespace from meta-schema (loaded from a fallback location like XHTML) +* Fixed half of failed W3C instance tests (remain 255 over 15344 tests) + +`v1.0.14`_ (2019-08-27) +======================= +* Added XSD 1.1 validator with class *XMLSchema11* +* Memory usage optimization with lazy build of the XSD 1.0 and 1.1 meta-schemas +* Added facilities for the encoding of unordered and collapsed content + `v1.0.13`_ (2019-06-19) ======================= * Fix path normalization and tests for Windows platform @@ -249,3 +263,5 @@ v0.9.6 (2017-05-05) .. _v1.0.10: https://github.com/brunato/xmlschema/compare/v1.0.9...v1.0.10 .. _v1.0.11: https://github.com/brunato/xmlschema/compare/v1.0.10...v1.0.11 .. _v1.0.13: https://github.com/brunato/xmlschema/compare/v1.0.11...v1.0.13 +.. _v1.0.14: https://github.com/brunato/xmlschema/compare/v1.0.13...v1.0.14 +.. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15 diff --git a/README.rst b/README.rst index e626ecb..ad879de 100644 --- a/README.rst +++ b/README.rst @@ -26,16 +26,23 @@ Features This library includes the following features: -* Full XSD 1.0 support +* Full XSD 1.0 and XSD 1.1 support * Building of XML schema objects from XSD files * Validation of XML instances against XSD schemas * Decoding of XML data into Python data and to JSON * Encoding of Python data and JSON to XML * Data decoding and encoding ruled by converter classes * An XPath based API for finding schema's elements and attributes -* Support of XSD validation modes +* Support of XSD validation modes *strict*/*lax*/*skip* * Remote attacks protection by default using an XMLParser that forbids entities +.. note:: + Currently the XSD 1.1 validator is provided by class `XMLSchema11` and + the default `XMLSchema` class is still an alias of the XSD 1.0 validator, + the class `XMLSchema10`. From version 1.1 of the package the default + validator will be linked to the XSD 1.1 validator, a version that will also + removes support for Python 2.7. + Installation ============ @@ -63,6 +70,11 @@ the file containing the schema as argument: >>> import xmlschema >>> my_schema = xmlschema.XMLSchema('xmlschema/tests/cases/examples/vehicles/vehicles.xsd') +.. note:: + For XSD 1.1 schemas use the class `XMLSchema11`, because the default class + `XMLSchema` is still an alias of the XSD 1.0 validator class `XMLSchema10`. + From next minor release (v1.1) the default class will become `XMLSchema11`. + The schema can be used to validate XML documents: .. code-block:: pycon @@ -126,14 +138,9 @@ values that match to the data types declared by the schema: 'title': None, 'year': '1925'}]} -Roadmap -======= - -* XSD 1.1 Authors ======= - Davide Brunato and others who have contributed with code or with sample cases. License diff --git a/doc/api.rst b/doc/api.rst index 0138d44..9e57b7c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -18,9 +18,10 @@ Schema level API ---------------- .. class:: xmlschema.XMLSchema10 +.. class:: xmlschema.XMLSchema11 - The class for XSD v1.0 schema instances. It's generated by the meta-class :class:`XMLSchemaMeta` - and takes the same API of :class:`XMLSchemaBase`. + The classes for XSD v1.0 and v1.1 schema instances. They are both generated by the + meta-class :class:`XMLSchemaMeta` and take the same API of :class:`XMLSchemaBase`. .. autoclass:: xmlschema.XMLSchema @@ -56,6 +57,7 @@ Schema level API .. automethod:: check_schema .. automethod:: build + .. automethod:: clear .. autoattribute:: built .. autoattribute:: validation_attempted .. autoattribute:: validity @@ -76,26 +78,14 @@ Schema level API .. automethod:: iter_encode -ElementTree and XPath API -------------------------- -.. autoclass:: xmlschema.ElementPathMixin - - .. autoattribute:: tag - .. autoattribute:: attrib - .. automethod:: get - .. automethod:: iter - .. automethod:: iterchildren - .. automethod:: find - .. automethod:: findall - .. automethod:: iterfind - - -XSD globals maps API --------------------- +XSD global maps API +------------------- .. autoclass:: xmlschema.XsdGlobals - :members: copy, register, iter_schemas, iter_globals, clear, build + :members: copy, register, iter_schemas, iter_globals, lookup_notation, lookup_type, + lookup_attribute, lookup_attribute_group, lookup_group, lookup_element, lookup, + clear, build, unbuilt, check .. _xml-schema-converters-api: @@ -111,6 +101,7 @@ to JSON data `_. .. autoclass:: xmlschema.XMLSchemaConverter + .. autoattribute:: lossy .. autoattribute:: lossless .. autoattribute:: losslessly @@ -121,6 +112,9 @@ to JSON data `_. .. automethod:: element_decode .. automethod:: element_encode + .. automethod:: map_qname + .. automethod:: unmap_qname + .. autoclass:: xmlschema.UnorderedConverter .. autoclass:: xmlschema.ParkerConverter @@ -170,6 +164,123 @@ Resource access API .. autofunction:: xmlschema.normalize_url +XSD components API +------------------ + +.. note:: + For XSD components only methods included in the following documentation are considered + part of the stable API, the others are considered internals that can be changed without + forewarning. + +XSD elements +^^^^^^^^^^^^ +.. autoclass:: xmlschema.validators.Xsd11Element +.. autoclass:: xmlschema.validators.XsdElement + +XSD attributes +^^^^^^^^^^^^^^ +.. autoclass:: xmlschema.validators.Xsd11Attribute +.. autoclass:: xmlschema.validators.XsdAttribute + +XSD types +^^^^^^^^^ +.. autoclass:: xmlschema.validators.XsdType + :members: is_simple, is_complex, is_atomic, is_empty, is_emptiable, has_simple_content, + has_mixed_content, is_element_only +.. autoclass:: xmlschema.validators.Xsd11ComplexType +.. autoclass:: xmlschema.validators.XsdComplexType +.. autoclass:: xmlschema.validators.XsdSimpleType +.. autoclass:: xmlschema.validators.XsdAtomicBuiltin +.. autoclass:: xmlschema.validators.XsdList +.. autoclass:: xmlschema.validators.Xsd11Union +.. autoclass:: xmlschema.validators.XsdUnion +.. autoclass:: xmlschema.validators.Xsd11AtomicRestriction +.. autoclass:: xmlschema.validators.XsdAtomicRestriction + +Attribute and model groups +^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. autoclass:: xmlschema.validators.XsdAttributeGroup +.. autoclass:: xmlschema.validators.Xsd11Group +.. autoclass:: xmlschema.validators.XsdGroup + +Wildcards +^^^^^^^^^ +.. autoclass:: xmlschema.validators.Xsd11AnyElement +.. autoclass:: xmlschema.validators.XsdAnyElement +.. autoclass:: xmlschema.validators.Xsd11AnyAttribute +.. autoclass:: xmlschema.validators.XsdAnyAttribute +.. autoclass:: xmlschema.validators.XsdOpenContent +.. autoclass:: xmlschema.validators.XsdDefaultOpenContent + +Identity constraints +^^^^^^^^^^^^^^^^^^^^ +.. autoclass:: xmlschema.validators.XsdIdentity +.. autoclass:: xmlschema.validators.XsdSelector +.. autoclass:: xmlschema.validators.XsdFieldSelector +.. autoclass:: xmlschema.validators.Xsd11Unique +.. autoclass:: xmlschema.validators.XsdUnique +.. autoclass:: xmlschema.validators.Xsd11Key +.. autoclass:: xmlschema.validators.XsdKey +.. autoclass:: xmlschema.validators.Xsd11Keyref +.. autoclass:: xmlschema.validators.XsdKeyref + +Facets +^^^^^^ +.. autoclass:: xmlschema.validators.XsdFacet +.. autoclass:: xmlschema.validators.XsdWhiteSpaceFacet +.. autoclass:: xmlschema.validators.XsdLengthFacet +.. autoclass:: xmlschema.validators.XsdMinLengthFacet +.. autoclass:: xmlschema.validators.XsdMaxLengthFacet +.. autoclass:: xmlschema.validators.XsdMinInclusiveFacet +.. autoclass:: xmlschema.validators.XsdMinExclusiveFacet +.. autoclass:: xmlschema.validators.XsdMaxInclusiveFacet +.. autoclass:: xmlschema.validators.XsdMaxExclusiveFacet +.. autoclass:: xmlschema.validators.XsdTotalDigitsFacet +.. autoclass:: xmlschema.validators.XsdFractionDigitsFacet +.. autoclass:: xmlschema.validators.XsdExplicitTimezoneFacet +.. autoclass:: xmlschema.validators.XsdAssertionFacet +.. autoclass:: xmlschema.validators.XsdEnumerationFacets +.. autoclass:: xmlschema.validators.XsdPatternFacets + +Other XSD components +^^^^^^^^^^^^^^^^^^^^ +.. autoclass:: xmlschema.validators.XsdAssert +.. autoclass:: xmlschema.validators.XsdAlternative +.. autoclass:: xmlschema.validators.XsdNotation +.. autoclass:: xmlschema.validators.XsdAnnotation + +XSD Validation API +^^^^^^^^^^^^^^^^^^ +This API is implemented for XSD schemas, elements, attributes, types, attribute +groups and model groups. + +.. autoclass:: xmlschema.validators.ValidationMixin + + .. automethod:: is_valid + .. automethod:: validate + .. automethod:: decode + .. automethod:: iter_decode + .. automethod:: iter_encode + .. automethod:: iter_errors + .. automethod:: encode + .. automethod:: iter_encode + +ElementTree and XPath API +^^^^^^^^^^^^^^^^^^^^^^^^^ +This API is implemented for XSD schemas, elements and complexType's assertions. + +.. autoclass:: xmlschema.ElementPathMixin + + .. autoattribute:: tag + .. autoattribute:: attrib + .. automethod:: get + .. automethod:: iter + .. automethod:: iterchildren + .. automethod:: find + .. automethod:: findall + .. automethod:: iterfind + + .. _errors-and-exceptions: Errors and exceptions @@ -190,3 +301,4 @@ Errors and exceptions .. autoexception:: xmlschema.XMLSchemaIncludeWarning .. autoexception:: xmlschema.XMLSchemaImportWarning +.. autoexception:: xmlschema.XMLSchemaTypeTableWarning diff --git a/doc/conf.py b/doc/conf.py index cd13579..c0eed6c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ author = 'Davide Brunato' # The short X.Y version. version = '1.0' # The full version, including alpha/beta/rc tags. -release = '1.0.13' +release = '1.0.15' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/notes.rst b/doc/notes.rst index 077a8f6..4d88eee 100644 --- a/doc/notes.rst +++ b/doc/notes.rst @@ -11,8 +11,3 @@ Support The project is hosted on GitHub, refer to the `xmlschema's project page `_ for source code and for an issue tracker. - -Roadmap -------- - -* XSD 1.1 \ No newline at end of file diff --git a/doc/testing.rst b/doc/testing.rst index 5a380b5..b24ef1b 100644 --- a/doc/testing.rst +++ b/doc/testing.rst @@ -27,7 +27,7 @@ subdirectory. There are several test scripts, each one for a different topic: Tests about XML/XSD resources access **test_schemas.py** - Tests about parsing of XSD Schemas + Tests about parsing of XSD schemas and components **test_validators.py** Tests regarding XML data validation/decoding/encoding @@ -142,15 +142,35 @@ Testing with the W3C XML Schema 1.1 test suite ---------------------------------------------- From release v1.0.11, using the script *test_w3c_suite.py*, you can run also tests based on the -`W3C XML Schema 1.1 test suite `_. To run these tests, currently -limited to XSD 1.0 schema tests, clone the W3C repo on the project's parent directory and than -run the script: +`W3C XML Schema 1.1 test suite `_. To run these tests clone the +W3C repo on the project's parent directory and than run the script: .. code-block:: text git clone https://github.com/w3c/xsdtests.git python xmlschema/xmlschema/tests/test_w3c_suite.py +You can also provides additional options for select a different set of tests: + +**--xml** + Add tests for instances, skipped for default. + +**--xsd10** + Run only XSD 1.0 tests. + +**--xsd11** + Run only XSD 1.1 tests. + +**--valid** + Run only tests signed as *valid*. + +**--invalid** + Run only tests signed as *invalid*. + +**[NUM [NUM ...]]** + Run only the cases that match a list of progressive numbers, associated + to the test classes by the script. + Testing other schemas and instances ----------------------------------- diff --git a/doc/usage.rst b/doc/usage.rst index 1ade3d1..fda3cde 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -20,8 +20,8 @@ Import the library in your code with:: import xmlschema -The module initialization builds the XSD meta-schemas and of the dictionary -containing the code points of the Unicode categories. +The module initialization builds the dictionary containing the code points of +the Unicode categories. Create a schema instance @@ -103,21 +103,21 @@ The global maps can be accessed through :attr:`XMLSchema.maps` attribute: >>> from pprint import pprint >>> pprint(sorted(schema.maps.types.keys())[:5]) ['{http://example.com/vehicles}vehicleType', - '{http://www.w3.org/1999/xlink}actuateType', - '{http://www.w3.org/1999/xlink}arcType', - '{http://www.w3.org/1999/xlink}arcroleType', - '{http://www.w3.org/1999/xlink}extended'] + '{http://www.w3.org/2001/XMLSchema}ENTITIES', + '{http://www.w3.org/2001/XMLSchema}ENTITY', + '{http://www.w3.org/2001/XMLSchema}ID', + '{http://www.w3.org/2001/XMLSchema}IDREF'] >>> pprint(sorted(schema.maps.elements.keys())[:10]) ['{http://example.com/vehicles}bikes', '{http://example.com/vehicles}cars', '{http://example.com/vehicles}vehicles', - '{http://www.w3.org/1999/xlink}arc', - '{http://www.w3.org/1999/xlink}locator', - '{http://www.w3.org/1999/xlink}resource', - '{http://www.w3.org/1999/xlink}title', '{http://www.w3.org/2001/XMLSchema}all', '{http://www.w3.org/2001/XMLSchema}annotation', - '{http://www.w3.org/2001/XMLSchema}any'] + '{http://www.w3.org/2001/XMLSchema}any', + '{http://www.w3.org/2001/XMLSchema}anyAttribute', + '{http://www.w3.org/2001/XMLSchema}appinfo', + '{http://www.w3.org/2001/XMLSchema}attribute', + '{http://www.w3.org/2001/XMLSchema}attributeGroup'] Schema objects include methods for finding XSD elements and attributes in the schema. Those are methods ot the ElementTree's API, so you can use an XPath expression for @@ -553,3 +553,11 @@ From release v1.0.12 the document validation and decoding API has an optional ar that can be changed to True for operating with a lazy :class:`XMLResource`. The lazy mode can be useful for validating and decoding big XML data files. This is still an experimental feature that will be refined and integrated in future versions. + + +XSD 1.0 and 1.1 support +----------------------- +From release v1.0.14 XSD 1.1 support has been added to the library through the class +:class:`XMLSchema11`. You have to use this class for XSD 1.1 schemas instead the default +class :class:`XMLSchema` that is still linked to XSD 1.0 validator :class:`XMLSchema10`. +From next minor release (v1.1) the default class will become :class:`XMLSchema11`. diff --git a/publiccode.yml b/publiccode.yml new file mode 100644 index 0000000..6ecb1e0 --- /dev/null +++ b/publiccode.yml @@ -0,0 +1,68 @@ +# This repository adheres to the publiccode.yml standard by including this +# metadata file that makes public software easily discoverable. +# More info at https://github.com/italia/publiccode.yml + +publiccodeYmlVersion: '0.2' +name: xmlschema +url: 'https://github.com/sissaschool/xmlschema' +landingURL: 'https://github.com/sissaschool/xmlschema' +releaseDate: '2019-10-13' +softwareVersion: v1.0.15 +developmentStatus: stable +platforms: + - linux + - windows + - mac +softwareType: library +inputTypes: + - text/xml + - application/xml + - application/json +outputTypes: + - application/json + - application/xml +categories: + - data-analytics + - data-collection +maintenance: + type: internal + contacts: + - name: Davide Brunato + email: davide.brunato@sissa.it + affiliation: ' Scuola Internazionale Superiore di Studi Avanzati' +legal: + license: MIT + mainCopyrightOwner: Scuola Internazionale Superiore di Studi Avanzati + repoOwner: Scuola Internazionale Superiore di Studi Avanzati +localisation: + localisationReady: false + availableLanguages: + - en +it: + countryExtensionVersion: '0.2' + riuso: + codiceIPA: sissa +description: + en: + genericName: xmlschema + apiDocumentation: 'https://xmlschema.readthedocs.io/en/latest/api.html' + documentation: 'http://xmlschema.readthedocs.io/en/latest/' + shortDescription: XML Schema validator and data conversion library for Python + longDescription: > + The _xmlschema_ library is an implementation of [XML + Schema](http://www.w3.org/2001/XMLSchema) for Python (supports Python 2.7 + and Python 3.5+). + + + This library arises from the needs of a solid Python layer for processing + XML Schema based files for [MaX (Materials design at the + Exascale)](http://www.max-centre.eu/) European project. A significant + problem is the encoding and the decoding of the XML data files produced by + different simulation software. Another important requirement is the XML + data validation, in order to put the produced data under control. The lack + of a suitable alternative for Python in the schema-based decoding of XML + data has led to build this library. Obviously this library can be useful + for other cases related to XML Schema based processing, not only for the + original scope. + features: + - XSD 1.0 and XSD 1.1 validator and decoder diff --git a/requirements-dev.txt b/requirements-dev.txt index 926cb6b..83dfcbd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ setuptools tox coverage -elementpath~=1.1.7 +elementpath~=1.3.0 lxml memory_profiler pathlib2 # For Py27 tests on resources diff --git a/setup.py b/setup.py index 979550c..603a73e 100755 --- a/setup.py +++ b/setup.py @@ -38,8 +38,8 @@ class InstallCommand(install): setup( name='xmlschema', - version='1.0.13', - install_requires=['elementpath~=1.1.7'], + version='1.0.15', + install_requires=['elementpath~=1.3.0'], packages=['xmlschema'], include_package_data=True, cmdclass={ diff --git a/tox.ini b/tox.ini index 2107676..2497e14 100644 --- a/tox.ini +++ b/tox.ini @@ -4,32 +4,34 @@ # and then run "tox" from this directory. [tox] -envlist = py27, py35, py36, py37, py38, docs, flake8, coverage +envlist = package, py27, py35, py36, py37, py38, memory, docs, flake8, coverage skip_missing_interpreters = true toxworkdir = {homedir}/.tox/xmlschema [testenv] deps = lxml - elementpath~=1.1.7 - py37: memory_profiler + elementpath~=1.3.0 + py27: pathlib2 + memory: memory_profiler docs: Sphinx docs: sphinx_rtd_theme flake8: flake8 coverage: coverage + coverage: memory_profiler commands = python xmlschema/tests/test_all.py {posargs} whitelist_externals = make -[testenv:py27] -deps = - lxml - elementpath~=1.1.7 - pathlib2 -commands = python xmlschema/tests/test_all.py {posargs} - [testenv:py38] -deps = elementpath~=1.1.7 -commands = python xmlschema/tests/test_all.py {posargs} +deps = + lxml==4.3.5 + elementpath~=1.3.0 + +[testenv:package] +commands = python xmlschema/tests/test_package.py + +[testenv:memory] +commands = python xmlschema/tests/test_memory.py [testenv:docs] commands = diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index 43442ef..e9adc67 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -8,25 +8,29 @@ # # @author Davide Brunato # -from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError +from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError, \ + XMLSchemaNamespaceError +from .etree import etree_tostring from .resources import ( normalize_url, fetch_resource, load_xml_resource, fetch_namespaces, fetch_schema_locations, fetch_schema, XMLResource ) from .xpath import ElementPathMixin from .converters import ( - ElementData, XMLSchemaConverter, ParkerConverter, BadgerFishConverter, AbderaConverter, JsonMLConverter + ElementData, XMLSchemaConverter, UnorderedConverter, ParkerConverter, + BadgerFishConverter, AbderaConverter, JsonMLConverter ) from .documents import validate, to_dict, to_json, from_json from .validators import ( - XMLSchemaValidatorError, XMLSchemaParseError, XMLSchemaNotBuiltError, XMLSchemaModelError, - XMLSchemaModelDepthError, XMLSchemaValidationError, XMLSchemaDecodeError, XMLSchemaEncodeError, - XMLSchemaChildrenValidationError, XMLSchemaIncludeWarning, XMLSchemaImportWarning, XsdGlobals, - XMLSchemaBase, XMLSchema, XMLSchema10 + XMLSchemaValidatorError, XMLSchemaParseError, XMLSchemaNotBuiltError, + XMLSchemaModelError, XMLSchemaModelDepthError, XMLSchemaValidationError, + XMLSchemaDecodeError, XMLSchemaEncodeError, XMLSchemaChildrenValidationError, + XMLSchemaIncludeWarning, XMLSchemaImportWarning, XMLSchemaTypeTableWarning, + XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11 ) -__version__ = '1.0.13' +__version__ = '1.0.15' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2016-2019, SISSA" diff --git a/xmlschema/codepoints.py b/xmlschema/codepoints.py index 84dc04e..efbbc9e 100644 --- a/xmlschema/codepoints.py +++ b/xmlschema/codepoints.py @@ -194,7 +194,7 @@ def iterparse_character_group(s, expand_ranges=False): raise XMLSchemaRegexError("bad character %r at position %d" % (s[k], k)) escaped = on_range = False char = s[k] - if k >= length - 1 or s[k + 1] != '-': + if k >= length - 2 or s[k + 1] != '-': yield ord(char) elif s[k] == '\\': if escaped: @@ -209,7 +209,7 @@ def iterparse_character_group(s, expand_ranges=False): yield ord('\\') on_range = False char = s[k] - if k >= length - 1 or s[k + 1] != '-': + if k >= length - 2 or s[k + 1] != '-': yield ord(char) if escaped: yield ord('\\') @@ -678,3 +678,18 @@ if maxunicode == UCS4_MAXUNICODE: 'IsCJKCompatibilityIdeographsSupplement': UnicodeSubset('\U0002F800-\U0002FA1F'), 'IsTags': UnicodeSubset('\U000E0000-\U000E007F') }) + + +def unicode_subset(name, block_safe=False): + if name.startswith('Is'): + try: + return UNICODE_BLOCKS[name] + except KeyError: + if block_safe: + return UnicodeSubset.fromlist([0, maxunicode]) + raise XMLSchemaRegexError("%r doesn't match to any Unicode block." % name) + else: + try: + return UNICODE_CATEGORIES[name] + except KeyError: + raise XMLSchemaRegexError("%r doesn't match to any Unicode category." % name) diff --git a/xmlschema/converters.py b/xmlschema/converters.py index badc2ad..3570d47 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -11,13 +11,16 @@ This module contains converter classes and definitions. """ from __future__ import unicode_literals -from collections import namedtuple, OrderedDict +from collections import namedtuple +from types import MethodType import string +import warnings from .compat import ordered_dict_class, unicode_type from .exceptions import XMLSchemaValueError -from .etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace from .namespaces import XSI_NAMESPACE +from .qnames import local_name +from .etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace from xmlschema.namespaces import NamespaceMapper ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes']) @@ -33,6 +36,7 @@ attributes. def raw_xml_encode(value): + """Encodes a simple value to XML.""" if isinstance(value, bool): return 'true' if value else 'false' elif isinstance(value, (list, tuple)): @@ -45,7 +49,11 @@ class XMLSchemaConverter(NamespaceMapper): """ Generic XML Schema based converter class. A converter is used to compose decoded XML data for an Element into a data structure and to build an Element - from encoded data structure. + from encoded data structure. There are two methods for interfacing the + converter with the decoding/encoding process. The method *element_decode* + accepts ElementData instance, containing the element parts, and returns + a data structure. The method *element_encode* accepts a data structure + and returns an ElementData that can be :param namespaces: map from namespace prefixes to URI. :param dict_class: dictionary class to use for decoded data. Default is `dict`. @@ -55,12 +63,12 @@ class XMLSchemaConverter(NamespaceMapper): :param text_key: is the key to apply to element's decoded text data. :param attr_prefix: controls the mapping of XML attributes, to the same name or \ with a prefix. If `None` the converter ignores attributes. - :param cdata_prefix: is used for including and prefixing the CDATA parts of a \ - mixed content, that are labeled with an integer instead of a string. \ - CDATA parts are ignored if this argument is `None`. + :param cdata_prefix: is used for including and prefixing the character data parts \ + of a mixed content, that are labeled with an integer instead of a string. \ + Character data parts are ignored if this argument is `None`. :param indent: number of spaces for XML indentation (default is 4). - :param strip_namespaces: remove namespace information from names during decoding \ - or encoding, defaults to `False`. + :param strip_namespaces: if set to `True` removes namespace declarations from data and \ + namespace information from names, during decoding or encoding. Defaults to `False`. :param preserve_root: if set to `True` the root element is preserved, wrapped into a \ single-item dictionary. Applicable only to default converter and to :class:`ParkerConverter`. :param force_dict: if set to `True` complex elements with simple content are decoded \ @@ -81,6 +89,25 @@ class XMLSchemaConverter(NamespaceMapper): :ivar force_dict: force dictionary for complex elements with simple content :ivar force_list: force list for child elements """ + # Deprecation from release v1.0.14 + def _unmap_attribute_qname(self, name): + warnings.warn("the _unmap_attribute_qname method is deprecated and will " + "be removed in 1.1 version. Use the unmap_qname() instead, " + "providing the attribute group of the XSD element for the " + "optional *name_table* argument.", + DeprecationWarning, stacklevel=2) + if name[0] == '{' or ':' not in name: + return name + else: + return self.unmap_qname(name) + + @property + def lossless(self): + """The negation of *lossy* property, preserved for backward compatibility.""" + warnings.warn("the lossless property will be removed in 1.1 version, " + "use 'not self.lossy' instead", DeprecationWarning, stacklevel=2) + return not self.lossy + def __init__(self, namespaces=None, dict_class=None, list_class=None, etree_element_class=None, text_key='$', attr_prefix='@', cdata_prefix=None, indent=4, strip_namespaces=False, preserve_root=False, force_dict=False, force_list=False, **kwargs): @@ -103,8 +130,6 @@ class XMLSchemaConverter(NamespaceMapper): super(XMLSchemaConverter, self).__init__(namespaces, etree_register_namespace) else: super(XMLSchemaConverter, self).__init__(namespaces, lxml_etree_register_namespace) - if strip_namespaces: - self.map_qname = self.unmap_qname = self._unmap_attribute_qname = self._local_name def __setattr__(self, name, value): if name in ('attr_prefix', 'text_key', 'cdata_prefix'): @@ -112,18 +137,27 @@ class XMLSchemaConverter(NamespaceMapper): raise XMLSchemaValueError('%r cannot includes letters or underscores: %r' % (name, value)) elif name == 'attr_prefix': self.ns_prefix = (value or '') + 'xmlns' + elif name == 'strip_namespaces': + if value: + self.map_qname = MethodType(local_name, self) + self.unmap_qname = MethodType(lambda x, y=None: local_name(x), self) + elif getattr(self, 'strip_namespaces', False): + # Rebuild instance methods only if necessary + self.map_qname = MethodType(XMLSchemaConverter.map_qname, self) + self.unmap_qname = MethodType(XMLSchemaConverter.unmap_qname, self) super(XMLSchemaConverter, self).__setattr__(name, value) @property - def lossless(self): - """The converter can ignore some kind of XML data during decoding.""" - return self.cdata_prefix and self.text_key and self.attr_prefix + def lossy(self): + """The converter ignores some kind of XML data during decoding/encoding.""" + return not self.cdata_prefix or not self.text_key or not self.attr_prefix @property def losslessly(self): """ - The format of decoded data is without loss of quality. Only losslessly formats can be - always used to encode to an XML data that is strictly conformant to the schema. + The XML data is decoded without loss of quality, neither on data nor on data model + shape. Only losslessly converters can be always used to encode to an XML data that + is strictly conformant to the schema. """ return False @@ -162,26 +196,6 @@ class XMLSchemaConverter(NamespaceMapper): for name, value in attributes: yield self.map_qname(name), value - def _unmap_attribute_qname(self, name): - if name[0] == '{' or ':' not in name: - return name - else: - return self.unmap_qname(name) - - @staticmethod - def _local_name(qname): - try: - if qname[0] == '{': - _, local_name = qname.split('}') - elif ':' in qname: - _, local_name = qname.split(':') - else: - return qname - except ValueError: - return qname - else: - return local_name - def map_content(self, content): """ A generator function for converting decoded content to a data structure. @@ -224,7 +238,8 @@ class XMLSchemaConverter(NamespaceMapper): elem = self.etree_element_class(tag, self.dict(attrib)) else: nsmap = {prefix if prefix else None: uri for prefix, uri in self._namespaces.items()} - elem = self.etree_element_class(tag, OrderedDict(attrib), nsmap) + elem = self.etree_element_class(tag, nsmap=nsmap) + elem.attrib.update(attrib) if children: elem.extend(children) @@ -246,7 +261,7 @@ class XMLSchemaConverter(NamespaceMapper): :return: a data structure containing the decoded data. """ result_dict = self.dict() - if level == 0 and xsd_element.is_global and self: + if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: schema_namespaces = set(xsd_element.namespaces.values()) result_dict.update( ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) for k, v in self.items() @@ -309,12 +324,10 @@ class XMLSchemaConverter(NamespaceMapper): if not isinstance(obj, (self.dict, dict)): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): - return ElementData(tag, obj, None, self.dict()) + return ElementData(tag, obj, None, {}) else: - return ElementData(tag, None, obj, self.dict()) + return ElementData(tag, None, obj, {}) - unmap_qname = self.unmap_qname - unmap_attribute_qname = self._unmap_attribute_qname text_key = self.text_key attr_prefix = self.attr_prefix ns_prefix = self.ns_prefix @@ -322,9 +335,9 @@ class XMLSchemaConverter(NamespaceMapper): text = None content = [] - attributes = self.dict() + attributes = {} for name, value in obj.items(): - if text_key and name == text_key: + if text_key and name == self.text_key: text = obj[text_key] elif (cdata_prefix and name.startswith(cdata_prefix)) or \ name[0].isdigit() and cdata_prefix == '': @@ -333,32 +346,32 @@ class XMLSchemaConverter(NamespaceMapper): elif name == ns_prefix: self[''] = value elif name.startswith('%s:' % ns_prefix): - self[name[len(ns_prefix) + 1:]] = value + if not self.strip_namespaces: + self[name[len(ns_prefix) + 1:]] = value elif attr_prefix and name.startswith(attr_prefix): - name = name[len(attr_prefix):] - attributes[unmap_attribute_qname(name)] = value + attr_name = name[len(attr_prefix):] + ns_name = self.unmap_qname(attr_name, xsd_element.attributes) + attributes[ns_name] = value elif not isinstance(value, (self.list, list)) or not value: - content.append((unmap_qname(name), value)) + content.append((self.unmap_qname(name), value)) elif isinstance(value[0], (self.dict, dict, self.list, list)): - ns_name = unmap_qname(name) - for item in value: - content.append((ns_name, item)) + ns_name = self.unmap_qname(name) + content.extend((ns_name, item) for item in value) else: - ns_name = unmap_qname(name) + ns_name = self.unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.match(ns_name, self.get('')) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) else: - for item in value: - content.append((ns_name, item)) + content.extend((ns_name, item) for item in value) break else: if attr_prefix == '' and ns_name not in attributes: - for xsd_attribute in xsd_element.attributes.values(): + for key, xsd_attribute in xsd_element.attributes.items(): if xsd_attribute.is_matching(ns_name): - attributes[ns_name] = value + attributes[key] = value break else: content.append((ns_name, value)) @@ -370,51 +383,14 @@ class XMLSchemaConverter(NamespaceMapper): class UnorderedConverter(XMLSchemaConverter): """ - Same as :class:`XMLSchemaConverter` but :meth:`element_encode` is - modified so the order of the elements in the encoded output is based on - the model visitor pattern rather than the order in which the elements - were added to the input dictionary. As the order of the input - dictionary is not preserved, text between sibling elements will raise - an exception. - - eg. - - .. code-block:: python - - import xmlschema - from xmlschema.converters import UnorderedConverter - - xsd = \"\"\" - - - - - - - - - - \"\"\" - - schema = xmlschema.XMLSchema(xsd, converter=UnorderedConverter) - tree = schema.to_etree( - {"A": [1, 2], "B": [3, 4]}, - ) - # Returns equivalent of: - # - # 1 - # 3 - # 2 - # 4 - # - - Schemas which contain repeated sequences (``maxOccurs > 1``) of - optional elements may be ambiguous using this approach when some of the - optional elements are not present. In those cases, decoding and then - encoding may not reproduce the original ordering. + Same as :class:`XMLSchemaConverter` but :meth:`element_encode` returns + a dictionary for the content of the element, that can be used directly + for unordered encoding mode. In this mode the order of the elements in + the encoded output is based on the model visitor pattern rather than + the order in which the elements were added to the input dictionary. + As the order of the input dictionary is not preserved, character data + between sibling elements are interleaved between tags. """ - def element_encode(self, obj, xsd_element, level=0): """ Extracts XML decoded data from a data structure for encoding into an ElementTree. @@ -437,57 +413,56 @@ class UnorderedConverter(XMLSchemaConverter): if not isinstance(obj, (self.dict, dict)): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): - return ElementData(tag, obj, None, self.dict()) + return ElementData(tag, obj, None, {}) else: - return ElementData(tag, None, obj, self.dict()) + return ElementData(tag, None, obj, {}) - unmap_qname = self.unmap_qname - unmap_attribute_qname = self._unmap_attribute_qname text_key = self.text_key attr_prefix = self.attr_prefix ns_prefix = self.ns_prefix cdata_prefix = self.cdata_prefix text = None - # `iter_encode` assumes that the values of this dict will all be lists - # where each item is the content of a single element. When building - # content_lu, content which is not a list or lists to be placed into a - # single element (element has a list content type) must be wrapped in a - # list to retain that structure. + attributes = {} + + # The unordered encoding mode assumes that the values of this dict will + # all be lists where each item is the content of a single element. When + # building content_lu, content which is not a list or lists to be placed + # into a single element (element has a list content type) must be wrapped + # in a list to retain that structure. Character data are not wrapped into + # lists because they because they are divided from the rest of the content + # into the unordered mode generator function of the ModelVisitor class. content_lu = {} - attributes = self.dict() + for name, value in obj.items(): if text_key and name == text_key: text = obj[text_key] elif (cdata_prefix and name.startswith(cdata_prefix)) or \ name[0].isdigit() and cdata_prefix == '': - raise XMLSchemaValueError( - "cdata segments are not compatible with the '{}' converter".format( - self.__class__.__name__ - ) - ) + index = int(name[len(cdata_prefix):]) + content_lu[index] = value elif name == ns_prefix: self[''] = value elif name.startswith('%s:' % ns_prefix): self[name[len(ns_prefix) + 1:]] = value elif attr_prefix and name.startswith(attr_prefix): - name = name[len(attr_prefix):] - attributes[unmap_attribute_qname(name)] = value + attr_name = name[len(attr_prefix):] + ns_name = self.unmap_qname(attr_name, xsd_element.attributes) + attributes[ns_name] = value elif not isinstance(value, (self.list, list)) or not value: - content_lu[unmap_qname(name)] = [value] + content_lu[self.unmap_qname(name)] = [value] elif isinstance(value[0], (self.dict, dict, self.list, list)): - content_lu[unmap_qname(name)] = value + content_lu[self.unmap_qname(name)] = value else: - # `value` is a list but not a list of lists or list of - # dicts. - ns_name = unmap_qname(name) + # `value` is a list but not a list of lists or list of dicts. + ns_name = self.unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.match(ns_name, self.get('')) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): - content_lu[unmap_qname(name)] = [value] + content_lu[self.unmap_qname(name)] = [value] else: - content_lu[unmap_qname(name)] = value + content_lu[self.unmap_qname(name)] = value break else: if attr_prefix == '' and ns_name not in attributes: @@ -496,9 +471,9 @@ class UnorderedConverter(XMLSchemaConverter): attributes[ns_name] = value break else: - content_lu[unmap_qname(name)] = [value] + content_lu[self.unmap_qname(name)] = [value] else: - content_lu[unmap_qname(name)] = [value] + content_lu[self.unmap_qname(name)] = [value] return ElementData(tag, text, content_lu, attributes) @@ -529,8 +504,8 @@ class ParkerConverter(XMLSchemaConverter): super(XMLSchemaConverter, self).__setattr__(name, value) @property - def lossless(self): - return False + def lossy(self): + return True def element_decode(self, data, xsd_element, level=0): map_qname = self.map_qname @@ -576,18 +551,18 @@ class ParkerConverter(XMLSchemaConverter): if obj == '': obj = None if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): - return ElementData(xsd_element.name, obj, None, self.dict()) + return ElementData(xsd_element.name, obj, None, {}) else: - return ElementData(xsd_element.name, None, obj, self.dict()) + return ElementData(xsd_element.name, None, obj, {}) else: unmap_qname = self.unmap_qname if not obj: - return ElementData(xsd_element.name, None, None, self.dict()) + return ElementData(xsd_element.name, None, None, {}) elif self.preserve_root: try: items = obj[self.map_qname(xsd_element.name)] except KeyError: - return ElementData(xsd_element.name, None, None, self.dict()) + return ElementData(xsd_element.name, None, None, {}) else: items = obj @@ -602,22 +577,20 @@ class ParkerConverter(XMLSchemaConverter): content.append((ns_name, item)) else: for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.match(ns_name, self.get('')) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) else: - for item in value: - content.append((ns_name, item)) + content.extend((ns_name, item) for item in value) break else: - for item in value: - content.append((ns_name, item)) + content.extend((ns_name, item) for item in value) except AttributeError: - return ElementData(xsd_element.name, items, None, self.dict()) + return ElementData(xsd_element.name, items, None, {}) else: - return ElementData(xsd_element.name, None, content, self.dict()) + return ElementData(xsd_element.name, None, content, {}) class BadgerFishConverter(XMLSchemaConverter): @@ -633,26 +606,26 @@ class BadgerFishConverter(XMLSchemaConverter): :param list_class: List class to use for decoded data. Default is `list`. """ def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs): - kwargs.update(attr_prefix='@', text_key='$', cdata_prefix='#') + kwargs.update(attr_prefix='@', text_key='$', cdata_prefix='$') super(BadgerFishConverter, self).__init__( namespaces, dict_class or ordered_dict_class, list_class, **kwargs ) def __setattr__(self, name, value): if name == 'text_key' and value != '$' or name == 'attr_prefix' and value != '@' or \ - name == 'cdata_prefix' and value != '#': + name == 'cdata_prefix' and value != '$': raise XMLSchemaValueError('Wrong value %r for the attribute %r of a %r.' % (value, name, type(self))) super(XMLSchemaConverter, self).__setattr__(name, value) @property - def lossless(self): - return True + def lossy(self): + return False def element_decode(self, data, xsd_element, level=0): dict_class = self.dict tag = self.map_qname(data.tag) - has_local_root = not len(self) + has_local_root = not self and not self.strip_namespaces result_dict = dict_class([t for t in self.map_attributes(data.attributes)]) if has_local_root: result_dict['@xmlns'] = dict_class() @@ -708,13 +681,13 @@ class BadgerFishConverter(XMLSchemaConverter): def element_encode(self, obj, xsd_element, level=0): map_qname = self.map_qname unmap_qname = self.unmap_qname - unmap_attribute_qname = self._unmap_attribute_qname tag = xsd_element.qualified_name if level == 0 else xsd_element.name - try: - self.update(obj['@xmlns']) - except KeyError: - pass + if not self.strip_namespaces: + try: + self.update(obj['@xmlns']) + except KeyError: + pass try: element_data = obj[map_qname(xsd_element.name)] @@ -726,7 +699,7 @@ class BadgerFishConverter(XMLSchemaConverter): cdata_prefix = self.cdata_prefix text = None content = [] - attributes = self.dict() + attributes = {} for name, value in element_data.items(): if name == '@xmlns': continue @@ -737,8 +710,9 @@ class BadgerFishConverter(XMLSchemaConverter): index = int(name[len(cdata_prefix):]) content.append((index, value)) elif attr_prefix and name.startswith(attr_prefix): - name = name[len(attr_prefix):] - attributes[unmap_attribute_qname(name)] = value + attr_name = name[len(attr_prefix):] + ns_name = self.unmap_qname(attr_name, xsd_element.attributes) + attributes[ns_name] = value elif not isinstance(value, (self.list, list)) or not value: content.append((unmap_qname(name), value)) elif isinstance(value[0], (self.dict, dict, self.list, list)): @@ -748,13 +722,12 @@ class BadgerFishConverter(XMLSchemaConverter): else: ns_name = unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.match(ns_name, self.get('')) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) else: - for item in value: - content.append((ns_name, item)) + content.extend((ns_name, item) for item in value) break else: if attr_prefix == '' and ns_name not in attributes: @@ -794,8 +767,8 @@ class AbderaConverter(XMLSchemaConverter): super(XMLSchemaConverter, self).__setattr__(name, value) @property - def lossless(self): - return False + def lossy(self): + return True def element_decode(self, data, xsd_element, level=0): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): @@ -837,13 +810,13 @@ class AbderaConverter(XMLSchemaConverter): if not isinstance(obj, (self.dict, dict)): if obj == []: obj = None - return ElementData(tag, obj, None, self.dict()) + return ElementData(tag, obj, None, {}) else: unmap_qname = self.unmap_qname - unmap_attribute_qname = self._unmap_attribute_qname - attributes = self.dict() + attributes = {} try: - attributes.update([(unmap_attribute_qname(k), v) for k, v in obj['attributes'].items()]) + attributes.update([(self.unmap_qname(k, xsd_element.attributes), v) + for k, v in obj['attributes'].items()]) except KeyError: children = obj else: @@ -869,13 +842,12 @@ class AbderaConverter(XMLSchemaConverter): else: ns_name = unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements(): - matched_element = xsd_child.match(ns_name, self.get('')) + matched_element = xsd_child.match(ns_name, resolve=True) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) else: - for item in value: - content.append((ns_name, item)) + content.extend((ns_name, item) for item in value) break else: content.append((ns_name, value)) @@ -907,8 +879,8 @@ class JsonMLConverter(XMLSchemaConverter): super(XMLSchemaConverter, self).__setattr__(name, value) @property - def lossless(self): - return True + def lossy(self): + return False @property def losslessly(self): @@ -927,7 +899,7 @@ class JsonMLConverter(XMLSchemaConverter): for name, value, _ in self.map_content(data.content) ]) - if level == 0 and xsd_element.is_global and self: + if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: attributes.update([('xmlns:%s' % k if k else 'xmlns', v) for k, v in self.items()]) if attributes: result_list.insert(1, attributes) @@ -935,7 +907,7 @@ class JsonMLConverter(XMLSchemaConverter): def element_encode(self, obj, xsd_element, level=0): unmap_qname = self.unmap_qname - attributes = self.dict() + attributes = {} if not isinstance(obj, (self.list, list)) or not obj: raise XMLSchemaValueError("Wrong data format, a not empty list required: %r." % obj) @@ -945,7 +917,6 @@ class JsonMLConverter(XMLSchemaConverter): raise XMLSchemaValueError("Unmatched tag") return ElementData(xsd_element.name, None, None, attributes) - unmap_attribute_qname = self._unmap_attribute_qname try: for k, v in obj[1].items(): if k == 'xmlns': @@ -953,7 +924,7 @@ class JsonMLConverter(XMLSchemaConverter): elif k.startswith('xmlns:'): self[k.split('xmlns:')[1]] = v else: - attributes[unmap_attribute_qname(k)] = v + attributes[self.unmap_qname(k, xsd_element.attributes)] = v except AttributeError: content_index = 1 else: diff --git a/xmlschema/documents.py b/xmlschema/documents.py index bc66718..439a8c9 100644 --- a/xmlschema/documents.py +++ b/xmlschema/documents.py @@ -25,12 +25,16 @@ def get_context(source, schema=None, cls=None, locations=None, base_url=None, if cls is None: cls = XMLSchema - if schema is None: + try: schema, locations = fetch_schema_locations(source, locations, base_url=base_url) + except ValueError: + if schema is None: + raise + elif not isinstance(schema, XMLSchemaBase): + schema = cls(schema, validation='strict', locations=locations, base_url=base_url, + defuse=defuse, timeout=timeout) + else: schema = cls(schema, validation='strict', locations=locations, defuse=defuse, timeout=timeout) - elif not isinstance(schema, XMLSchemaBase): - schema = cls(schema, validation='strict', locations=locations, base_url=base_url, - defuse=defuse, timeout=timeout) if not isinstance(source, XMLResource): source = XMLResource(source, defuse=defuse, timeout=timeout, lazy=lazy) diff --git a/xmlschema/etree.py b/xmlschema/etree.py index 6bd80cc..7c4d28f 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -13,8 +13,8 @@ This module contains ElementTree setup and helpers for xmlschema package. """ from __future__ import unicode_literals import sys -import re import importlib +import re from collections import Counter try: @@ -23,10 +23,9 @@ except ImportError: lxml_etree = None from .compat import PY3 -from .exceptions import XMLSchemaValueError, XMLSchemaTypeError -from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE -from .helpers import get_namespace, get_qname, qname_to_prefixed -from .xpath import ElementPathMixin +from .exceptions import XMLSchemaTypeError +from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE, get_namespace +from .qnames import get_qname, qname_to_prefixed ### # Programmatic import of xml.etree.ElementTree @@ -130,11 +129,6 @@ class SafeXMLParser(PyElementTree.XMLParser): ) -def is_etree_element(elem): - """More safer test for matching ElementTree elements.""" - return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin) - - def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False): """ Serialize an Element tree to a string. Tab characters are replaced by whitespaces. @@ -159,19 +153,21 @@ def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_ if isinstance(elem, etree_element): if namespaces: for prefix, uri in namespaces.items(): - etree_register_namespace(prefix, uri) + if not re.match(r'ns\d+$', prefix): + etree_register_namespace(prefix, uri) tostring = ElementTree.tostring elif isinstance(elem, py_etree_element): if namespaces: for prefix, uri in namespaces.items(): - PyElementTree.register_namespace(prefix, uri) + if not re.match(r'ns\d+$', prefix): + PyElementTree.register_namespace(prefix, uri) tostring = PyElementTree.tostring elif lxml_etree is not None: if namespaces: for prefix, uri in namespaces.items(): - if prefix: + if prefix and not re.match(r'ns\d+$', prefix): lxml_etree_register_namespace(prefix, uri) tostring = lxml_etree.tostring else: @@ -267,21 +263,6 @@ def etree_getpath(elem, root, namespaces=None, relative=True, add_position=False return path -def etree_last_child(elem): - """Returns the last child of the element, ignoring children that are lxml comments.""" - for child in reversed(elem): - if not callable(child.tag): - return child - - -def etree_child_index(elem, child): - """Return the index or raise ValueError if it is not a *child* of *elem*.""" - for index in range(len(elem)): - if elem[index] is child: - return index - raise XMLSchemaValueError("%r is not a child of %r" % (child, elem)) - - def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True): """ Tests the equality of two XML Element trees. @@ -316,7 +297,7 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True): if strict: raise AssertionError("%r != %r: attribute differ: %r != %r." % (e1, e2, e1.attrib, e2.attrib)) else: - assert e1.attrib.keys() == e2.attrib.keys(), \ + assert sorted(e1.attrib.keys()) == sorted(e2.attrib.keys()), \ "%r != %r: attribute keys differ: %r != %r." % (e1, e2, e1.attrib.keys(), e2.attrib.keys()) for k in e1.attrib: a1, a2 = e1.attrib[k].strip(), e2.attrib[k].strip() @@ -370,3 +351,27 @@ def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True): pass else: assert False, "First tree ends before the second: %r." % e2 + + +def prune_etree(root, selector): + """ + Removes from an tree structure the elements that verify the selector + function. The checking and eventual removals are performed using a + breadth-first visit method. + + :param root: the root element of the tree. + :param selector: the single argument function to apply on each visited node. + :return: `True` if the root node verify the selector function, `None` otherwise. + """ + def _prune_subtree(elem): + for child in elem[:]: + if selector(child): + elem.remove(child) + + for child in elem: + _prune_subtree(child) + + if selector(root): + del root[:] + return True + _prune_subtree(root) diff --git a/xmlschema/exceptions.py b/xmlschema/exceptions.py index 964bca9..53dd563 100644 --- a/xmlschema/exceptions.py +++ b/xmlschema/exceptions.py @@ -54,5 +54,9 @@ class XMLSchemaRegexError(XMLSchemaException, ValueError): """Raised when an error is found when parsing an XML Schema regular expression.""" +class XMLSchemaNamespaceError(XMLSchemaException, RuntimeError): + """Raised when a wrong runtime condition is found with a namespace.""" + + class XMLSchemaWarning(Warning): """Base warning class for the XMLSchema package.""" diff --git a/xmlschema/helpers.py b/xmlschema/helpers.py index 03c6785..8a77e86 100644 --- a/xmlschema/helpers.py +++ b/xmlschema/helpers.py @@ -11,80 +11,19 @@ """ This module contains various helper functions and classes. """ -import re +from decimal import Decimal -from .exceptions import XMLSchemaValueError, XMLSchemaTypeError, XMLSchemaKeyError +from .compat import string_base_type +from .exceptions import XMLSchemaValueError from .qnames import XSD_ANNOTATION +from .xpath import ElementPathMixin XSD_FINAL_ATTRIBUTE_VALUES = {'restriction', 'extension', 'list', 'union'} -NAMESPACE_PATTERN = re.compile(r'{([^}]*)}') -def get_namespace(name): - try: - return NAMESPACE_PATTERN.match(name).group(1) - except (AttributeError, TypeError): - return '' - - -def get_qname(uri, name): - """ - Returns an expanded QName from URI and local part. If any argument has boolean value - `False` or if the name is already an expanded QName, returns the *name* argument. - - :param uri: namespace URI - :param name: local or qualified name - :return: string or the name argument - """ - if not uri or not name or name[0] in ('{', '.', '/', '['): - return name - else: - return '{%s}%s' % (uri, name) - - -def local_name(qname): - """ - Return the local part of an expanded QName. If the name is `None` or empty - returns the *name* argument. - - :param qname: an expanded QName or a local name. - """ - try: - if qname[0] != '{': - return qname - return qname[qname.rindex('}') + 1:] - except IndexError: - return '' - except ValueError: - raise XMLSchemaValueError("wrong format for a universal name! %r" % qname) - except TypeError: - if qname is None: - return qname - raise XMLSchemaTypeError("required a string-like object or None! %r" % qname) - - -def qname_to_prefixed(qname, namespaces): - """ - Transforms a fully qualified name into a prefixed name using a namespace map. Returns the - *qname* argument if it's not a fully qualified name or if it has boolean value `False`. - - :param qname: a fully qualified name or a local name. - :param namespaces: a map from prefixes to namespace URIs. - :return: string with a prefixed or local reference. - """ - if not qname: - return qname - - namespace = get_namespace(qname) - for prefix, uri in sorted(filter(lambda x: x[1] == namespace, namespaces.items()), reverse=True): - if not uri: - return '%s:%s' % (prefix, qname) if prefix else qname - elif prefix: - return qname.replace('{%s}' % uri, '%s:' % prefix) - else: - return qname.replace('{%s}' % uri, '') - else: - return qname +def is_etree_element(elem): + """More safer test for matching ElementTree elements.""" + return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin) def get_xsd_annotation(elem): @@ -101,83 +40,6 @@ def get_xsd_annotation(elem): return -def iter_xsd_components(elem, start=0): - """ - Returns an iterator for XSD child components, excluding the annotation. - - :param elem: the parent Element. - :param start: the start child component to yield, the optional annotation is not counted. \ - With the default value 0 starts from the first component. - """ - counter = 0 - for child in elem: - if child.tag == XSD_ANNOTATION: - if counter > 0: - raise XMLSchemaValueError("XSD annotation not allowed after the first position.") - else: - if start > 0: - start -= 1 - else: - yield child - counter += 1 - - -def has_xsd_components(elem, start=0): - try: - next(iter_xsd_components(elem, start)) - except StopIteration: - return False - else: - return True - - -def get_xsd_component(elem, required=True, strict=True): - """ - Returns the first XSD component child, excluding the annotation. - - :param elem: the parent Element. - :param required: if `True`, that is the default, raises a *ValueError* if there \ - is not any component; with `False` in those cases `None` is returned. - :param strict: raises a *ValueError* if there is more than one component. - """ - components_iterator = iter_xsd_components(elem) - try: - xsd_component = next(components_iterator) - except StopIteration: - if required: - raise XMLSchemaValueError("missing XSD component") - return None - else: - if not strict: - return xsd_component - try: - next(components_iterator) - except StopIteration: - return xsd_component - else: - raise XMLSchemaValueError("too many XSD components") - - -def get_xml_bool_attribute(elem, attribute, default=None): - """ - Get an XML boolean attribute. - - :param elem: the Element instance. - :param attribute: the attribute name. - :param default: default value, accepted values are `True` or `False`. - :return: `True` or `False`. - """ - value = elem.get(attribute, default) - if value is None: - raise XMLSchemaKeyError(attribute) - elif value in ('true', '1') or value is True: - return True - elif value in ('false', '0') or value is False: - return False - else: - raise XMLSchemaTypeError("an XML boolean value is required for attribute %r" % attribute) - - def get_xsd_derivation_attribute(elem, attribute, values=None): """ Get a derivation attribute (maybe 'block', 'blockDefault', 'final' or 'finalDefault') @@ -198,7 +60,7 @@ def get_xsd_derivation_attribute(elem, attribute, values=None): items = value.split() if len(items) == 1 and items[0] == '#all': return ' '.join(values) - elif not all([s in values for s in items]): + elif not all(s in values for s in items): raise XMLSchemaValueError("wrong value %r for attribute %r." % (value, attribute)) return value @@ -221,6 +83,44 @@ def get_xsd_form_attribute(elem, attribute): return value +def count_digits(number): + """ + Counts the digits of a number. + + :param number: an int or a float or a Decimal or a string representing a number. + :return: a couple with the number of digits of the integer part and \ + the number of digits of the decimal part. + """ + if isinstance(number, string_base_type): + number = str(Decimal(number)).lstrip('-+') + else: + number = str(number).lstrip('-+') + + if 'E' in number: + significand, _, exponent = number.partition('E') + elif 'e' in number: + significand, _, exponent = number.partition('e') + elif '.' not in number: + return len(number.lstrip('0')), 0 + else: + integer_part, _, decimal_part = number.partition('.') + return len(integer_part.lstrip('0')), len(decimal_part.rstrip('0')) + + significand = significand.strip('0') + exponent = int(exponent) + + num_digits = len(significand) - 1 if '.' in significand else len(significand) + if exponent > 0: + return num_digits + exponent, 0 + else: + return 0, num_digits - exponent - 1 + + +def strictly_equal(obj1, obj2): + """Checks if the objects are equal and are of the same type.""" + return obj1 == obj2 and type(obj1) is type(obj2) + + class ParticleCounter(object): """ An helper class for counting total min/max occurrences of XSD particles. diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py index 5f970be..beff6c6 100644 --- a/xmlschema/namespaces.py +++ b/xmlschema/namespaces.py @@ -12,9 +12,9 @@ This module contains namespace definitions for W3C core standards and namespace related classes. """ from __future__ import unicode_literals +import re from .compat import MutableMapping, Mapping -from .helpers import get_namespace XSD_NAMESPACE = 'http://www.w3.org/2001/XMLSchema' "URI of the XML Schema Definition namespace (xs|xsd)" @@ -26,7 +26,7 @@ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' "URI of the XML namespace (xml)" XHTML_NAMESPACE = 'http://www.w3.org/1999/xhtml' -XHTML_DATATYPES_NAMESPACE = "http://www.w3.org/1999/xhtml/datatypes/" +XHTML_DATATYPES_NAMESPACE = 'http://www.w3.org/1999/xhtml/datatypes/' "URIs of the Extensible Hypertext Markup Language namespace (html)" XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink' @@ -38,10 +38,20 @@ XSLT_NAMESPACE = "http://www.w3.org/1999/XSL/Transform" HFP_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-hasFacetAndProperty' "URI of the XML Schema has Facet and Property namespace (hfp)" -VC_NAMESPACE = "http://www.w3.org/2007/XMLSchema-versioning" +VC_NAMESPACE = 'http://www.w3.org/2007/XMLSchema-versioning' "URI of the XML Schema Versioning namespace (vc)" +NAMESPACE_PATTERN = re.compile(r'{([^}]*)}') + + +def get_namespace(name): + try: + return NAMESPACE_PATTERN.match(name).group(1) + except (AttributeError, TypeError): + return '' + + class NamespaceResourcesMap(MutableMapping): """ Dictionary for storing information about namespace resources. The values are @@ -82,7 +92,7 @@ class NamespaceResourcesMap(MutableMapping): class NamespaceMapper(MutableMapping): """ - A class to map/unmap namespace prefixes to URIs. + A class to map/unmap namespace prefixes to URIs. The :param namespaces: Initial data with namespace prefixes and URIs. """ @@ -119,6 +129,13 @@ class NamespaceMapper(MutableMapping): self._namespaces.clear() def map_qname(self, qname): + """ + Converts an extended QName to the prefixed format. Only registered + namespaces are mapped. + + :param qname: a QName in extended format or a local name. + :return: a QName in prefixed format or a local name. + """ try: if qname[0] != '{' or not self._namespaces: return qname @@ -139,7 +156,17 @@ class NamespaceMapper(MutableMapping): else: return qname - def unmap_qname(self, qname): + def unmap_qname(self, qname, name_table=None): + """ + Converts a QName in prefixed format or a local name to the extended QName format. + Local names are converted only if a default namespace is included in the instance. + If a *name_table* is provided a local name is mapped to the default namespace + only if not found in the name table. + + :param qname: a QName in prefixed format or a local name + :param name_table: an optional lookup table for checking local names. + :return: a QName in extended format or a local name. + """ try: if qname[0] == '{' or not self: return qname @@ -149,8 +176,10 @@ class NamespaceMapper(MutableMapping): try: prefix, name = qname.split(':', 1) except ValueError: - if self.get(''): - return u'{%s}%s' % (self.get(''), qname) + if not self._namespaces.get(''): + return qname + elif name_table is None or qname not in name_table: + return '{%s}%s' % (self._namespaces.get(''), qname) else: return qname else: diff --git a/xmlschema/qnames.py b/xmlschema/qnames.py index 775722d..eb4f27d 100644 --- a/xmlschema/qnames.py +++ b/xmlschema/qnames.py @@ -9,192 +9,271 @@ # @author Davide Brunato # """ -This module contains qualified names constants. +This module contains qualified names constants and helpers. """ from __future__ import unicode_literals +from .exceptions import XMLSchemaTypeError, XMLSchemaValueError +from .namespaces import get_namespace + +VC_TEMPLATE = '{http://www.w3.org/2007/XMLSchema-versioning}%s' +XML_TEMPLATE = '{http://www.w3.org/XML/1998/namespace}%s' +XSD_TEMPLATE = '{http://www.w3.org/2001/XMLSchema}%s' +XSI_TEMPLATE = '{http://www.w3.org/2001/XMLSchema-instance}%s' -def xsd_qname(name): - return '{http://www.w3.org/2001/XMLSchema}%s' % name - - -def xml_qname(name): - return '{http://www.w3.org/XML/1998/namespace}%s' % name - - -def xsi_qname(name): - return '{http://www.w3.org/2001/XMLSchema-instance}%s' % name +# +# Version Control attributes (XSD 1.1) +VC_MIN_VERSION = VC_TEMPLATE % 'minVersion' +VC_MAX_VERSION = VC_TEMPLATE % 'maxVersion' +VC_TYPE_AVAILABLE = VC_TEMPLATE % 'typeAvailable' +VC_TYPE_UNAVAILABLE = VC_TEMPLATE % 'typeUnavailable' +VC_FACET_AVAILABLE = VC_TEMPLATE % 'facetAvailable' +VC_FACET_UNAVAILABLE = VC_TEMPLATE % 'facetUnavailable' # # XML attributes -XML_LANG = xml_qname('lang') -XML_SPACE = xml_qname('space') -XML_BASE = xml_qname('base') -XML_ID = xml_qname('id') -XML_SPECIAL_ATTRS = xml_qname('specialAttrs') +XML_LANG = XML_TEMPLATE % 'lang' +XML_SPACE = XML_TEMPLATE % 'space' +XML_BASE = XML_TEMPLATE % 'base' +XML_ID = XML_TEMPLATE % 'id' +XML_SPECIAL_ATTRS = XML_TEMPLATE % 'specialAttrs' + # # XML Schema Instance attributes -XSI_NIL = xsi_qname('nil') -XSI_TYPE = xsi_qname('type') -XSI_SCHEMA_LOCATION = xsi_qname('schemaLocation') -XSI_NONS_SCHEMA_LOCATION = xsi_qname('noNamespaceSchemaLocation') +XSI_NIL = XSI_TEMPLATE % 'nil' +XSI_TYPE = XSI_TEMPLATE % 'type' +XSI_SCHEMA_LOCATION = XSI_TEMPLATE % 'schemaLocation' +XSI_NONS_SCHEMA_LOCATION = XSI_TEMPLATE % 'noNamespaceSchemaLocation' # # XML Schema fully qualified names -XSD_SCHEMA = xsd_qname('schema') +XSD_SCHEMA = XSD_TEMPLATE % 'schema' # Annotations -XSD_ANNOTATION = xsd_qname('annotation') -XSD_APPINFO = xsd_qname('appinfo') -XSD_DOCUMENTATION = xsd_qname('documentation') +XSD_ANNOTATION = XSD_TEMPLATE % 'annotation' +XSD_APPINFO = XSD_TEMPLATE % 'appinfo' +XSD_DOCUMENTATION = XSD_TEMPLATE % 'documentation' # Composing schemas -XSD_INCLUDE = xsd_qname('include') -XSD_IMPORT = xsd_qname('import') -XSD_REDEFINE = xsd_qname('redefine') -XSD_OVERRIDE = xsd_qname('override') +XSD_INCLUDE = XSD_TEMPLATE % 'include' +XSD_IMPORT = XSD_TEMPLATE % 'import' +XSD_REDEFINE = XSD_TEMPLATE % 'redefine' +XSD_OVERRIDE = XSD_TEMPLATE % 'override' # Structures -XSD_SIMPLE_TYPE = xsd_qname('simpleType') -XSD_COMPLEX_TYPE = xsd_qname('complexType') -XSD_ATTRIBUTE = xsd_qname('attribute') -XSD_ELEMENT = xsd_qname('element') -XSD_NOTATION = xsd_qname('notation') +XSD_SIMPLE_TYPE = XSD_TEMPLATE % 'simpleType' +XSD_COMPLEX_TYPE = XSD_TEMPLATE % 'complexType' +XSD_ATTRIBUTE = XSD_TEMPLATE % 'attribute' +XSD_ELEMENT = XSD_TEMPLATE % 'element' +XSD_NOTATION = XSD_TEMPLATE % 'notation' # Grouping -XSD_GROUP = xsd_qname('group') -XSD_ATTRIBUTE_GROUP = xsd_qname('attributeGroup') +XSD_GROUP = XSD_TEMPLATE % 'group' +XSD_ATTRIBUTE_GROUP = XSD_TEMPLATE % 'attributeGroup' # simpleType declaration elements -XSD_RESTRICTION = xsd_qname('restriction') -XSD_LIST = xsd_qname('list') -XSD_UNION = xsd_qname('union') +XSD_RESTRICTION = XSD_TEMPLATE % 'restriction' +XSD_LIST = XSD_TEMPLATE % 'list' +XSD_UNION = XSD_TEMPLATE % 'union' # complexType content -XSD_EXTENSION = xsd_qname('extension') -XSD_SEQUENCE = xsd_qname('sequence') -XSD_CHOICE = xsd_qname('choice') -XSD_ALL = xsd_qname('all') -XSD_ANY = xsd_qname('any') -XSD_SIMPLE_CONTENT = xsd_qname('simpleContent') -XSD_COMPLEX_CONTENT = xsd_qname('complexContent') -XSD_ANY_ATTRIBUTE = xsd_qname('anyAttribute') +XSD_EXTENSION = XSD_TEMPLATE % 'extension' +XSD_SEQUENCE = XSD_TEMPLATE % 'sequence' +XSD_CHOICE = XSD_TEMPLATE % 'choice' +XSD_ALL = XSD_TEMPLATE % 'all' +XSD_ANY = XSD_TEMPLATE % 'any' +XSD_SIMPLE_CONTENT = XSD_TEMPLATE % 'simpleContent' +XSD_COMPLEX_CONTENT = XSD_TEMPLATE % 'complexContent' +XSD_ANY_ATTRIBUTE = XSD_TEMPLATE % 'anyAttribute' # # Facets (lexical, pre-lexical and value-based facets) -XSD_ENUMERATION = xsd_qname('enumeration') -XSD_LENGTH = xsd_qname('length') -XSD_MIN_LENGTH = xsd_qname('minLength') -XSD_MAX_LENGTH = xsd_qname('maxLength') -XSD_PATTERN = xsd_qname('pattern') # lexical facet -XSD_WHITE_SPACE = xsd_qname('whiteSpace') # pre-lexical facet -XSD_MAX_INCLUSIVE = xsd_qname('maxInclusive') -XSD_MAX_EXCLUSIVE = xsd_qname('maxExclusive') -XSD_MIN_INCLUSIVE = xsd_qname('minInclusive') -XSD_MIN_EXCLUSIVE = xsd_qname('minExclusive') -XSD_TOTAL_DIGITS = xsd_qname('totalDigits') -XSD_FRACTION_DIGITS = xsd_qname('fractionDigits') +XSD_ENUMERATION = XSD_TEMPLATE % 'enumeration' +XSD_LENGTH = XSD_TEMPLATE % 'length' +XSD_MIN_LENGTH = XSD_TEMPLATE % 'minLength' +XSD_MAX_LENGTH = XSD_TEMPLATE % 'maxLength' +XSD_PATTERN = XSD_TEMPLATE % 'pattern' # lexical facet +XSD_WHITE_SPACE = XSD_TEMPLATE % 'whiteSpace' # pre-lexical facet +XSD_MAX_INCLUSIVE = XSD_TEMPLATE % 'maxInclusive' +XSD_MAX_EXCLUSIVE = XSD_TEMPLATE % 'maxExclusive' +XSD_MIN_INCLUSIVE = XSD_TEMPLATE % 'minInclusive' +XSD_MIN_EXCLUSIVE = XSD_TEMPLATE % 'minExclusive' +XSD_TOTAL_DIGITS = XSD_TEMPLATE % 'totalDigits' +XSD_FRACTION_DIGITS = XSD_TEMPLATE % 'fractionDigits' # XSD 1.1 elements -XSD_OPEN_CONTENT = xsd_qname('openContent') # open content model -XSD_DEFAULT_OPEN_CONTENT = xsd_qname('defaultOpenContent') # default open content model (schema level) -XSD_ALTERNATIVE = xsd_qname('alternative') # conditional type assignment -XSD_ASSERT = xsd_qname('assert') # complex type assertions -XSD_ASSERTION = xsd_qname('assertion') # facets -XSD_EXPLICIT_TIMEZONE = xsd_qname('explicitTimezone') +XSD_OPEN_CONTENT = XSD_TEMPLATE % 'openContent' # open content model +XSD_DEFAULT_OPEN_CONTENT = XSD_TEMPLATE % 'defaultOpenContent' # default open content model (schema level) +XSD_ALTERNATIVE = XSD_TEMPLATE % 'alternative' # conditional type assignment +XSD_ASSERT = XSD_TEMPLATE % 'assert' # complex type assertions +XSD_ASSERTION = XSD_TEMPLATE % 'assertion' # facets +XSD_EXPLICIT_TIMEZONE = XSD_TEMPLATE % 'explicitTimezone' # Identity constraints -XSD_UNIQUE = xsd_qname('unique') -XSD_KEY = xsd_qname('key') -XSD_KEYREF = xsd_qname('keyref') -XSD_SELECTOR = xsd_qname('selector') -XSD_FIELD = xsd_qname('field') +XSD_UNIQUE = XSD_TEMPLATE % 'unique' +XSD_KEY = XSD_TEMPLATE % 'key' +XSD_KEYREF = XSD_TEMPLATE % 'keyref' +XSD_SELECTOR = XSD_TEMPLATE % 'selector' +XSD_FIELD = XSD_TEMPLATE % 'field' # # XSD Builtin Types # Special XSD built-in types. -XSD_ANY_TYPE = xsd_qname('anyType') -XSD_ANY_SIMPLE_TYPE = xsd_qname('anySimpleType') -XSD_ANY_ATOMIC_TYPE = xsd_qname('anyAtomicType') +XSD_ANY_TYPE = XSD_TEMPLATE % 'anyType' +XSD_ANY_SIMPLE_TYPE = XSD_TEMPLATE % 'anySimpleType' +XSD_ANY_ATOMIC_TYPE = XSD_TEMPLATE % 'anyAtomicType' # Other XSD built-in types. -XSD_DECIMAL = xsd_qname('decimal') -XSD_STRING = xsd_qname('string') -XSD_DOUBLE = xsd_qname('double') -XSD_FLOAT = xsd_qname('float') +XSD_DECIMAL = XSD_TEMPLATE % 'decimal' +XSD_STRING = XSD_TEMPLATE % 'string' +XSD_DOUBLE = XSD_TEMPLATE % 'double' +XSD_FLOAT = XSD_TEMPLATE % 'float' -XSD_DATE = xsd_qname('date') -XSD_DATETIME = xsd_qname('dateTime') -XSD_GDAY = xsd_qname('gDay') -XSD_GMONTH = xsd_qname('gMonth') -XSD_GMONTH_DAY = xsd_qname('gMonthDay') -XSD_GYEAR = xsd_qname('gYear') -XSD_GYEAR_MONTH = xsd_qname('gYearMonth') -XSD_TIME = xsd_qname('time') -XSD_DURATION = xsd_qname('duration') +XSD_DATE = XSD_TEMPLATE % 'date' +XSD_DATETIME = XSD_TEMPLATE % 'dateTime' +XSD_GDAY = XSD_TEMPLATE % 'gDay' +XSD_GMONTH = XSD_TEMPLATE % 'gMonth' +XSD_GMONTH_DAY = XSD_TEMPLATE % 'gMonthDay' +XSD_GYEAR = XSD_TEMPLATE % 'gYear' +XSD_GYEAR_MONTH = XSD_TEMPLATE % 'gYearMonth' +XSD_TIME = XSD_TEMPLATE % 'time' +XSD_DURATION = XSD_TEMPLATE % 'duration' -XSD_QNAME = xsd_qname('QName') -XSD_NOTATION_TYPE = xsd_qname('NOTATION') -XSD_ANY_URI = xsd_qname('anyURI') -XSD_BOOLEAN = xsd_qname('boolean') -XSD_BASE64_BINARY = xsd_qname('base64Binary') -XSD_HEX_BINARY = xsd_qname('hexBinary') -XSD_NORMALIZED_STRING = xsd_qname('normalizedString') -XSD_TOKEN = xsd_qname('token') -XSD_LANGUAGE = xsd_qname('language') -XSD_NAME = xsd_qname('Name') -XSD_NCNAME = xsd_qname('NCName') -XSD_ID = xsd_qname('ID') -XSD_IDREF = xsd_qname('IDREF') -XSD_ENTITY = xsd_qname('ENTITY') -XSD_NMTOKEN = xsd_qname('NMTOKEN') +XSD_QNAME = XSD_TEMPLATE % 'QName' +XSD_NOTATION_TYPE = XSD_TEMPLATE % 'NOTATION' +XSD_ANY_URI = XSD_TEMPLATE % 'anyURI' +XSD_BOOLEAN = XSD_TEMPLATE % 'boolean' +XSD_BASE64_BINARY = XSD_TEMPLATE % 'base64Binary' +XSD_HEX_BINARY = XSD_TEMPLATE % 'hexBinary' +XSD_NORMALIZED_STRING = XSD_TEMPLATE % 'normalizedString' +XSD_TOKEN = XSD_TEMPLATE % 'token' +XSD_LANGUAGE = XSD_TEMPLATE % 'language' +XSD_NAME = XSD_TEMPLATE % 'Name' +XSD_NCNAME = XSD_TEMPLATE % 'NCName' +XSD_ID = XSD_TEMPLATE % 'ID' +XSD_IDREF = XSD_TEMPLATE % 'IDREF' +XSD_ENTITY = XSD_TEMPLATE % 'ENTITY' +XSD_NMTOKEN = XSD_TEMPLATE % 'NMTOKEN' -XSD_INTEGER = xsd_qname('integer') -XSD_LONG = xsd_qname('long') -XSD_INT = xsd_qname('int') -XSD_SHORT = xsd_qname('short') -XSD_BYTE = xsd_qname('byte') -XSD_NON_NEGATIVE_INTEGER = xsd_qname('nonNegativeInteger') -XSD_POSITIVE_INTEGER = xsd_qname('positiveInteger') -XSD_UNSIGNED_LONG = xsd_qname('unsignedLong') -XSD_UNSIGNED_INT = xsd_qname('unsignedInt') -XSD_UNSIGNED_SHORT = xsd_qname('unsignedShort') -XSD_UNSIGNED_BYTE = xsd_qname('unsignedByte') -XSD_NON_POSITIVE_INTEGER = xsd_qname('nonPositiveInteger') -XSD_NEGATIVE_INTEGER = xsd_qname('negativeInteger') +XSD_INTEGER = XSD_TEMPLATE % 'integer' +XSD_LONG = XSD_TEMPLATE % 'long' +XSD_INT = XSD_TEMPLATE % 'int' +XSD_SHORT = XSD_TEMPLATE % 'short' +XSD_BYTE = XSD_TEMPLATE % 'byte' +XSD_NON_NEGATIVE_INTEGER = XSD_TEMPLATE % 'nonNegativeInteger' +XSD_POSITIVE_INTEGER = XSD_TEMPLATE % 'positiveInteger' +XSD_UNSIGNED_LONG = XSD_TEMPLATE % 'unsignedLong' +XSD_UNSIGNED_INT = XSD_TEMPLATE % 'unsignedInt' +XSD_UNSIGNED_SHORT = XSD_TEMPLATE % 'unsignedShort' +XSD_UNSIGNED_BYTE = XSD_TEMPLATE % 'unsignedByte' +XSD_NON_POSITIVE_INTEGER = XSD_TEMPLATE % 'nonPositiveInteger' +XSD_NEGATIVE_INTEGER = XSD_TEMPLATE % 'negativeInteger' # Built-in list types -XSD_IDREFS = xsd_qname('IDREFS') -XSD_ENTITIES = xsd_qname('ENTITIES') -XSD_NMTOKENS = xsd_qname('NMTOKENS') +XSD_IDREFS = XSD_TEMPLATE % 'IDREFS' +XSD_ENTITIES = XSD_TEMPLATE % 'ENTITIES' +XSD_NMTOKENS = XSD_TEMPLATE % 'NMTOKENS' # XSD 1.1 built-in types -XSD_DATE_TIME_STAMP = xsd_qname('dateTimeStamp') -XSD_DAY_TIME_DURATION = xsd_qname('dayTimeDuration') -XSD_YEAR_MONTH_DURATION = xsd_qname('yearMonthDuration') +XSD_DATE_TIME_STAMP = XSD_TEMPLATE % 'dateTimeStamp' +XSD_DAY_TIME_DURATION = XSD_TEMPLATE % 'dayTimeDuration' +XSD_YEAR_MONTH_DURATION = XSD_TEMPLATE % 'yearMonthDuration' +XSD_ERROR = XSD_TEMPLATE % 'error' -__all__ = [ - 'XML_LANG', 'XML_ID', 'XML_BASE', 'XML_SPACE', 'XML_SPECIAL_ATTRS', 'XSI_TYPE', 'XSI_NIL', - 'XSI_SCHEMA_LOCATION', 'XSI_NONS_SCHEMA_LOCATION', 'XSD_SCHEMA', 'XSD_ANNOTATION', 'XSD_APPINFO', - 'XSD_DOCUMENTATION', 'XSD_INCLUDE', 'XSD_IMPORT', 'XSD_REDEFINE', 'XSD_SIMPLE_TYPE', 'XSD_COMPLEX_TYPE', - 'XSD_ATTRIBUTE', 'XSD_ELEMENT', 'XSD_NOTATION', 'XSD_GROUP', 'XSD_ATTRIBUTE_GROUP', 'XSD_RESTRICTION', - 'XSD_LIST', 'XSD_UNION', 'XSD_EXTENSION', 'XSD_SEQUENCE', 'XSD_CHOICE', 'XSD_ALL', 'XSD_ANY', - 'XSD_SIMPLE_CONTENT', 'XSD_COMPLEX_CONTENT', 'XSD_ANY_ATTRIBUTE', 'XSD_ENUMERATION', 'XSD_LENGTH', - 'XSD_MIN_LENGTH', 'XSD_MAX_LENGTH', 'XSD_PATTERN', 'XSD_WHITE_SPACE', 'XSD_MAX_INCLUSIVE', - 'XSD_MAX_EXCLUSIVE', 'XSD_MIN_INCLUSIVE', 'XSD_MIN_EXCLUSIVE', 'XSD_TOTAL_DIGITS', 'XSD_FRACTION_DIGITS', - 'XSD_OPEN_CONTENT', 'XSD_ALTERNATIVE', 'XSD_ASSERT', 'XSD_ASSERTION', 'XSD_EXPLICIT_TIMEZONE', - 'XSD_UNIQUE', 'XSD_KEY', 'XSD_KEYREF', 'XSD_SELECTOR', 'XSD_FIELD', 'XSD_ANY_TYPE', 'XSD_ANY_SIMPLE_TYPE', - 'XSD_ANY_ATOMIC_TYPE', 'XSD_DECIMAL', 'XSD_STRING', 'XSD_DOUBLE', 'XSD_FLOAT', 'XSD_DATE', 'XSD_DATETIME', - 'XSD_GDAY', 'XSD_GMONTH', 'XSD_GMONTH_DAY', 'XSD_GYEAR', 'XSD_GYEAR_MONTH', 'XSD_TIME', 'XSD_DURATION', - 'XSD_QNAME', 'XSD_NOTATION_TYPE', 'XSD_ANY_URI', 'XSD_BOOLEAN', 'XSD_BASE64_BINARY', 'XSD_HEX_BINARY', - 'XSD_NORMALIZED_STRING', 'XSD_TOKEN', 'XSD_LANGUAGE', 'XSD_NAME', 'XSD_NCNAME', 'XSD_ID', 'XSD_IDREF', - 'XSD_ENTITY', 'XSD_NMTOKEN', 'XSD_INTEGER', 'XSD_LONG', 'XSD_INT', 'XSD_SHORT', 'XSD_BYTE', - 'XSD_NON_NEGATIVE_INTEGER', 'XSD_POSITIVE_INTEGER', 'XSD_UNSIGNED_LONG', 'XSD_UNSIGNED_INT', - 'XSD_UNSIGNED_SHORT', 'XSD_UNSIGNED_BYTE', 'XSD_NON_POSITIVE_INTEGER', 'XSD_NEGATIVE_INTEGER', - 'XSD_IDREFS', 'XSD_ENTITIES', 'XSD_NMTOKENS', 'XSD_DATE_TIME_STAMP', 'XSD_DAY_TIME_DURATION', - 'XSD_YEAR_MONTH_DURATION', 'XSD_DEFAULT_OPEN_CONTENT', 'XSD_OVERRIDE', -] + +def get_qname(uri, name): + """ + Returns an expanded QName from URI and local part. If any argument has boolean value + `False` or if the name is already an expanded QName, returns the *name* argument. + + :param uri: namespace URI + :param name: local or qualified name + :return: string or the name argument + """ + if not uri or not name or name[0] in ('{', '.', '/', '['): + return name + else: + return '{%s}%s' % (uri, name) + + +def local_name(qname): + """ + Return the local part of an expanded QName or a prefixed name. If the name + is `None` or empty returns the *name* argument. + + :param qname: an expanded QName or a prefixed name or a local name. + """ + try: + if qname[0] == '{': + _, qname = qname.split('}') + elif ':' in qname: + _, qname = qname.split(':') + except IndexError: + return '' + except ValueError: + raise XMLSchemaValueError("the argument 'qname' has a wrong format: %r" % qname) + except TypeError: + if qname is None: + return qname + raise XMLSchemaTypeError("the argument 'qname' must be a string-like object or None") + else: + return qname + + +def qname_to_prefixed(qname, namespaces): + """ + Transforms a fully qualified name into a prefixed name using a namespace map. + Returns the *qname* argument if it's not a fully qualified name or if it has + boolean value `False`. + + :param qname: an extended QName or a local name. + :param namespaces: a map from prefixes to namespace URIs. + :return: a QName in prefixed format or a local name. + """ + if not qname: + return qname + + namespace = get_namespace(qname) + for prefix, uri in sorted(filter(lambda x: x[1] == namespace, namespaces.items()), reverse=True): + if not uri: + return '%s:%s' % (prefix, qname) if prefix else qname + elif prefix: + return qname.replace('{%s}' % uri, '%s:' % prefix) + else: + return qname.replace('{%s}' % uri, '') + else: + return qname + + +def qname_to_extended(qname, namespaces): + """ + Converts a QName in prefixed format or a local name to the extended QName format. + + :param qname: a QName in prefixed format or a local name. + :param namespaces: a map from prefixes to namespace URIs. + :return: a QName in extended format or a local name. + """ + try: + if qname[0] == '{' or not namespaces: + return qname + except IndexError: + return qname + + try: + prefix, name = qname.split(':', 1) + except ValueError: + if not namespaces.get(''): + return qname + else: + return '{%s}%s' % (namespaces[''], qname) + else: + try: + uri = namespaces[prefix] + except KeyError: + return qname + else: + return u'{%s}%s' % (uri, name) if uri else name diff --git a/xmlschema/regex.py b/xmlschema/regex.py index 8705c08..11fa8d2 100644 --- a/xmlschema/regex.py +++ b/xmlschema/regex.py @@ -9,31 +9,23 @@ # @author Davide Brunato # """ -Parse and translate XML regular expressions to Python regex syntax. +Parse and translate XML Schema regular expressions to Python regex syntax. """ from __future__ import unicode_literals import re +from itertools import chain from sys import maxunicode from .compat import PY3, unicode_type, string_base_type, MutableSet from .exceptions import XMLSchemaValueError, XMLSchemaRegexError -from .codepoints import UNICODE_CATEGORIES, UNICODE_BLOCKS, UnicodeSubset +from .codepoints import UnicodeSubset, UNICODE_CATEGORIES, unicode_subset +_RE_HYPHENS = re.compile(r'(? '1.0') elif part.startswith('\\P'): if self._re_unicode_ref.search(part) is None: raise XMLSchemaValueError("wrong Unicode subset specification %r" % part) - self.negative |= get_unicode_subset(part[3:-1]) + self.negative |= unicode_subset(part[3:-1], self.xsd_version > '1.0') else: self.positive.update(part) @@ -183,11 +178,11 @@ class XsdRegexCharGroup(MutableSet): elif part.startswith('\\p'): if self._re_unicode_ref.search(part) is None: raise XMLSchemaValueError("wrong Unicode subset specification %r" % part) - self.positive -= get_unicode_subset(part[3:-1]) + self.positive -= unicode_subset(part[3:-1], self.xsd_version > '1.0') elif part.startswith('\\P'): if self._re_unicode_ref.search(part) is None: raise XMLSchemaValueError("wrong Unicode subset specification %r" % part) - self.negative -= get_unicode_subset(part[3:-1]) + self.negative -= unicode_subset(part[3:-1], self.xsd_version > '1.0') else: self.positive.difference_update(part) @@ -199,13 +194,15 @@ class XsdRegexCharGroup(MutableSet): self.positive, self.negative = self.negative, self.positive -def parse_character_class(xml_regex, class_pos): +def parse_character_class(xml_regex, class_pos, xsd_version='1.0'): """ Parses a character class of an XML Schema regular expression. :param xml_regex: the source XML Schema regular expression. :param class_pos: the position of the character class in the source string, \ must coincide with a '[' character. + :param xsd_version: the version of the XML Schema processor ('1.0' or '1.1') \ + that called the regular expression parsing. :return: an `XsdRegexCharGroup` instance and the first position after the character class. """ if xml_regex[class_pos] != '[': @@ -226,8 +223,15 @@ def parse_character_class(xml_regex, class_pos): pos += 2 elif xml_regex[pos] == ']' or xml_regex[pos:pos + 2] == '-[': if pos == group_pos: - raise XMLSchemaRegexError("empty character class at position %d: %r" % (class_pos, xml_regex)) - char_group = XsdRegexCharGroup(xml_regex[group_pos:pos]) + raise XMLSchemaRegexError( + "empty character class at position %d: %r" % (class_pos, xml_regex) + ) + if _RE_HYPHENS.search(xml_regex[group_pos:pos]) and pos - group_pos > 2: + raise XMLSchemaRegexError( + "invalid character range '--' at position %d: %r" % (class_pos, xml_regex) + ) + + char_group = XsdRegexCharGroup(xsd_version, xml_regex[group_pos:pos]) if negative: char_group.complement() break @@ -240,15 +244,21 @@ def parse_character_class(xml_regex, class_pos): subtracted_group, pos = parse_character_class(xml_regex, pos) pos += 1 if xml_regex[pos] != ']': - raise XMLSchemaRegexError("unterminated character group at position %d: %r" % (class_pos, xml_regex)) + raise XMLSchemaRegexError( + "unterminated character group at position %d: %r" % (class_pos, xml_regex) + ) char_group -= subtracted_group return char_group, pos -def get_python_regex(xml_regex): +def get_python_regex(xml_regex, xsd_version='1.0'): """ Translates an XML regex expression to a Python compatible expression. + + :param xml_regex: the source XML Schema regular expression. + :param xsd_version: the version of the XML Schema processor ('1.0' or '1.1') \ + that called the regular expression parsing. """ regex = ['^('] pos = 0 @@ -269,7 +279,7 @@ def get_python_regex(xml_regex): regex.append(r'\%s' % ch) elif ch == '[': try: - char_group, pos = parse_character_class(xml_regex, pos) + char_group, pos = parse_character_class(xml_regex, pos, xsd_version) except IndexError: raise XMLSchemaRegexError( "unterminated character group at position %d: %r" % (pos, xml_regex) @@ -340,7 +350,7 @@ def get_python_regex(xml_regex): raise XMLSchemaRegexError( "truncated unicode block escape at position %d: %r" % (block_pos, xml_regex)) - p_shortcut_set = get_unicode_subset(xml_regex[block_pos + 3:pos]) + p_shortcut_set = unicode_subset(xml_regex[block_pos + 3:pos], xsd_version > '1.0') if xml_regex[block_pos + 1] == 'p': regex.append('[%s]' % p_shortcut_set) else: diff --git a/xmlschema/resources.py b/xmlschema/resources.py index be9f1e8..fa9e168 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -18,9 +18,9 @@ from .compat import ( pathname2url, URLError, uses_relative ) from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError +from .namespaces import get_namespace from .qnames import XSI_SCHEMA_LOCATION, XSI_NONS_SCHEMA_LOCATION -from .helpers import get_namespace -from .etree import ElementTree, PyElementTree, SafeXMLParser, is_etree_element, etree_tostring +from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring DEFUSE_MODES = ('always', 'remote', 'never') @@ -285,7 +285,7 @@ class XMLResource(object): def _fromsource(self, source): url, lazy = None, self._lazy - if is_etree_element(source): + if hasattr(source, 'tag'): self._lazy = False return source, None, None, None, None # Source is already an Element --> nothing to load elif isinstance(source, string_base_type): @@ -335,7 +335,7 @@ class XMLResource(object): except (AttributeError, TypeError): pass else: - if is_etree_element(root): + if hasattr(root, 'tag'): self._lazy = False return root, source, None, None, None diff --git a/xmlschema/tests/__init__.py b/xmlschema/tests/__init__.py index 158459b..9190c32 100644 --- a/xmlschema/tests/__init__.py +++ b/xmlschema/tests/__init__.py @@ -20,16 +20,11 @@ import xmlschema from xmlschema import XMLSchema from xmlschema.compat import urlopen, URLError, unicode_type from xmlschema.exceptions import XMLSchemaValueError -from xmlschema.etree import ( - is_etree_element, etree_element, etree_register_namespace, etree_elements_assert_equal -) -from xmlschema.resources import fetch_namespaces from xmlschema.qnames import XSD_SCHEMA -from xmlschema.helpers import get_namespace -from xmlschema.namespaces import XSD_NAMESPACE - -from .schema_observers import SchemaObserver -from .test_factory import tests_factory +from xmlschema.namespaces import XSD_NAMESPACE, get_namespace +from xmlschema.etree import etree_element, etree_register_namespace, etree_elements_assert_equal +from xmlschema.resources import fetch_namespaces +from xmlschema.helpers import is_etree_element def has_network_access(*locations): @@ -44,30 +39,38 @@ def has_network_access(*locations): SKIP_REMOTE_TESTS = not has_network_access('http://www.sissa.it', 'http://www.w3.org/', 'http://dublincore.org/') -PROTECTED_PREFIX_PATTERN = re.compile(r'ns\d:') +PROTECTED_PREFIX_PATTERN = re.compile(r'\bns\d:') +TEST_CASES_DIR = os.path.join(os.path.dirname(__file__), 'test_cases/') +SCHEMA_TEMPLATE = """ + + {1} +""" + + +def casepath(relative_path): + """ + Returns the absolute path from a relative path specified from the `xmlschema/tests/test_cases/` dir. + """ + return os.path.join(TEST_CASES_DIR, relative_path) def print_test_header(): + """Print an header thar displays Python version and platform used for test session.""" header1 = "Test %r" % xmlschema header2 = "with Python {} on platform {}".format(platform.python_version(), platform.platform()) print('{0}\n{1}\n{2}\n{0}'.format("*" * max(len(header1), len(header2)), header1, header2)) -class XMLSchemaTestCase(unittest.TestCase): +class XsdValidatorTestCase(unittest.TestCase): """ - XMLSchema TestCase class. + TestCase class for XSD validators. + """ + @classmethod + def casepath(cls, relative_path): + return casepath(relative_path) - Setup tests common environment. The tests parts have to use empty prefix for - XSD namespace names and 'ns' prefix for XMLSchema test namespace names. - """ - test_cases_dir = os.path.join(os.path.dirname(__file__), 'test_cases/') - etree_register_namespace(prefix='', uri=XSD_NAMESPACE) + etree_register_namespace(prefix='xs', uri=XSD_NAMESPACE) etree_register_namespace(prefix='ns', uri="ns") - SCHEMA_TEMPLATE = """ - - {1} - """ schema_class = XMLSchema @@ -83,36 +86,27 @@ class XMLSchemaTestCase(unittest.TestCase): 'ns': 'ns', } - cls.vh_dir = cls.casepath('examples/vehicles') - cls.vh_xsd_file = cls.casepath('examples/vehicles/vehicles.xsd') - cls.vh_xml_file = cls.casepath('examples/vehicles/vehicles.xml') - cls.vh_json_file = cls.casepath('examples/vehicles/vehicles.json') + cls.vh_dir = casepath('examples/vehicles') + cls.vh_xsd_file = casepath('examples/vehicles/vehicles.xsd') + cls.vh_xml_file = casepath('examples/vehicles/vehicles.xml') + cls.vh_json_file = casepath('examples/vehicles/vehicles.json') cls.vh_schema = cls.schema_class(cls.vh_xsd_file) cls.vh_namespaces = fetch_namespaces(cls.vh_xml_file) - cls.col_dir = cls.casepath('examples/collection') - cls.col_xsd_file = cls.casepath('examples/collection/collection.xsd') - cls.col_xml_file = cls.casepath('examples/collection/collection.xml') - cls.col_json_file = cls.casepath('examples/collection/collection.json') + cls.col_dir = casepath('examples/collection') + cls.col_xsd_file = casepath('examples/collection/collection.xsd') + cls.col_xml_file = casepath('examples/collection/collection.xml') + cls.col_json_file = casepath('examples/collection/collection.json') cls.col_schema = cls.schema_class(cls.col_xsd_file) cls.col_namespaces = fetch_namespaces(cls.col_xml_file) - cls.st_xsd_file = cls.casepath('features/decoder/simple-types.xsd') + cls.st_xsd_file = casepath('features/decoder/simple-types.xsd') cls.st_schema = cls.schema_class(cls.st_xsd_file) - cls.models_xsd_file = cls.casepath('features/models/models.xsd') + cls.models_xsd_file = casepath('features/models/models.xsd') cls.models_schema = cls.schema_class(cls.models_xsd_file) - @classmethod - def casepath(cls, path): - """ - Returns the absolute path of a test case file. - - :param path: the relative path of the case file from base dir ``xmlschema/tests/test_cases/``. - """ - return os.path.join(cls.test_cases_dir, path) - - def retrieve_schema_source(self, source): + def get_schema_source(self, source): """ Returns a schema source that can be used to create an XMLSchema instance. @@ -129,9 +123,7 @@ class XMLSchemaTestCase(unittest.TestCase): raise XMLSchemaValueError("% is not an XSD global definition/declaration." % source) root = etree_element('schema', attrib={ - 'xmlns:ns': "ns", - 'xmlns': "http://www.w3.org/2001/XMLSchema", - 'targetNamespace': "ns", + 'xmlns:xs': "http://www.w3.org/2001/XMLSchema", 'elementFormDefault': "qualified", 'version': self.schema_class.XSD_VERSION, }) @@ -140,18 +132,20 @@ class XMLSchemaTestCase(unittest.TestCase): else: source = source.strip() if not source.startswith('<'): - return self.casepath(source) + return casepath(source) + elif source.startswith(''.format( + source = ''.format( name, ' '.join('%s="%s"' % (k, v) for k, v in attrib.items()) ) - schema = self.schema_class(self.retrieve_schema_source(source)) + schema = self.schema_class(self.get_schema_source(source)) return schema.elements[name] def check_etree_elements(self, elem, other): @@ -168,6 +162,23 @@ class XMLSchemaTestCase(unittest.TestCase): msg = "Protected prefix {!r} found:\n {}".format(match.group(0), s) self.assertIsNone(match, msg) + def check_schema(self, source, expected=None, **kwargs): + """ + Create a schema for a test case. + + :param source: A relative path or a root Element or a portion of schema for a template. + :param expected: If it's an Exception class test the schema for raise an error. \ + Otherwise build the schema and test a condition if expected is a callable, or make \ + a substring test if it's not `None` (maybe a string). Then returns the schema instance. + """ + if isinstance(expected, type) and issubclass(expected, Exception): + self.assertRaises(expected, self.schema_class, self.get_schema_source(source), **kwargs) + else: + schema = self.schema_class(self.get_schema_source(source), **kwargs) + if callable(expected): + self.assertTrue(expected(schema)) + return schema + def check_errors(self, path, expected): """ Checks schema or validation errors, checking information completeness of the diff --git a/xmlschema/tests/check_memory.py b/xmlschema/tests/check_memory.py index dc2e44d..4a0c936 100755 --- a/xmlschema/tests/check_memory.py +++ b/xmlschema/tests/check_memory.py @@ -10,7 +10,7 @@ # @author Davide Brunato # """ -Check xmlschema package import memory usage. +Check xmlschema package memory usage. Refs: https://pypi.org/project/memory_profiler/ @@ -47,8 +47,16 @@ parser.add_argument('xml_file', metavar='XML_FILE', nargs='?', help='Input XML f args = parser.parse_args() +# noinspection PyUnresolvedReferences @profile def import_package(): + # Imports of packages used by xmlschema that + # have a significant memory usage impact. + import decimal + from urllib.error import URLError + import lxml.etree + import elementpath + import xmlschema return xmlschema @@ -128,13 +136,17 @@ if __name__ == '__main__': etree_emptied_iterparse(args.xml_file) elif args.test_num == 5: import xmlschema + xmlschema.XMLSchema.meta_schema.build() decode(args.xml_file) elif args.test_num == 6: import xmlschema + xmlschema.XMLSchema.meta_schema.build() lazy_decode(args.xml_file) elif args.test_num == 7: import xmlschema + xmlschema.XMLSchema.meta_schema.build() validate(args.xml_file) elif args.test_num == 8: import xmlschema + xmlschema.XMLSchema.meta_schema.build() lazy_validate(args.xml_file) diff --git a/xmlschema/tests/test_all.py b/xmlschema/tests/test_all.py index 98c43b6..d031516 100644 --- a/xmlschema/tests/test_all.py +++ b/xmlschema/tests/test_all.py @@ -10,16 +10,33 @@ # @author Davide Brunato # if __name__ == '__main__': - from xmlschema.tests.test_helpers import * - from xmlschema.tests.test_meta import * - from xmlschema.tests.test_regex import * - from xmlschema.tests.test_xpath import * - from xmlschema.tests.test_resources import * - from xmlschema.tests.test_models import * - from xmlschema.tests.test_schemas import * - from xmlschema.tests.test_validators import * - from xmlschema.tests.test_package import * + import unittest + import os + from xmlschema.tests import print_test_header + from xmlschema.tests import test_cases, test_etree, test_helpers, \ + test_meta, test_models, test_regex, test_resources, test_xpath + from xmlschema.tests.validation import test_validation, test_decoding, test_encoding + + def load_tests(loader, tests, pattern): + tests.addTests(loader.loadTestsFromModule(test_cases)) + + validators_dir = os.path.join(os.path.dirname(__file__), 'validators') + tests.addTests(loader.discover(start_dir=validators_dir, pattern=pattern or 'test_*.py')) + + tests.addTests(loader.loadTestsFromModule(test_validation)) + tests.addTests(loader.loadTestsFromModule(test_decoding)) + tests.addTests(loader.loadTestsFromModule(test_encoding)) + + tests.addTests(loader.loadTestsFromModule(test_etree)) + tests.addTests(loader.loadTestsFromModule(test_helpers)) + tests.addTests(loader.loadTestsFromModule(test_meta)) + tests.addTests(loader.loadTestsFromModule(test_models)) + tests.addTests(loader.loadTestsFromModule(test_regex)) + tests.addTests(loader.loadTestsFromModule(test_resources)) + tests.addTests(loader.loadTestsFromModule(test_xpath)) + + return tests print_test_header() unittest.main() diff --git a/xmlschema/tests/test_cases/__init__.py b/xmlschema/tests/test_cases/__init__.py new file mode 100644 index 0000000..ab1b2de --- /dev/null +++ b/xmlschema/tests/test_cases/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +""" +Creates the test classes for cases when unittest's discovery loads this subpackage. +""" +from xmlschema.tests.test_factory import tests_factory, \ + make_schema_test_class, make_validator_test_class + +# Creates schema tests from XSD files +globals().update(tests_factory(make_schema_test_class, 'xsd')) + +# Creates schema tests from XML files +globals().update(tests_factory(make_validator_test_class, 'xml')) diff --git a/xmlschema/tests/test_cases/features/models/models.xsd b/xmlschema/tests/test_cases/features/models/models.xsd index 60eccc3..b9edc82 100644 --- a/xmlschema/tests/test_cases/features/models/models.xsd +++ b/xmlschema/tests/test_cases/features/models/models.xsd @@ -119,4 +119,16 @@ + + + + + + + + + + + + diff --git a/xmlschema/tests/test_cases/features/patterns/patterns.xml b/xmlschema/tests/test_cases/features/patterns/patterns.xml index f7abdf7..d541b43 100644 --- a/xmlschema/tests/test_cases/features/patterns/patterns.xml +++ b/xmlschema/tests/test_cases/features/patterns/patterns.xml @@ -15,4 +15,6 @@ 2015-12-31T13:32:26-02:00 2015-12-31T13:32:26+02:00 5067746900909 + abc + . diff --git a/xmlschema/tests/test_cases/features/patterns/patterns.xsd b/xmlschema/tests/test_cases/features/patterns/patterns.xsd index f7a9fa0..49d2205 100644 --- a/xmlschema/tests/test_cases/features/patterns/patterns.xsd +++ b/xmlschema/tests/test_cases/features/patterns/patterns.xsd @@ -11,6 +11,8 @@ + + @@ -70,4 +72,17 @@ + + + + + + + + + + + + + diff --git a/xmlschema/tests/test_etree.py b/xmlschema/tests/test_etree.py new file mode 100644 index 0000000..e039181 --- /dev/null +++ b/xmlschema/tests/test_etree.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +"""Tests for ElementTree import and for a pure-python version with a safe parser.""" +import unittest +import os +import importlib +import sys +import subprocess + + +@unittest.skipIf(sys.version_info < (3,), "In Python 2 ElementTree is not overwritten by cElementTree") +class TestElementTree(unittest.TestCase): + + def test_element_string_serialization(self): + ElementTree = importlib.import_module('xml.etree.ElementTree') + xmlschema_etree = importlib.import_module('xmlschema.etree') + + elem = ElementTree.Element('element') + self.assertEqual(xmlschema_etree.etree_tostring(elem), '') + elem = xmlschema_etree.ElementTree.Element('element') + self.assertEqual(xmlschema_etree.etree_tostring(elem), '') + elem = xmlschema_etree.PyElementTree.Element('element') + self.assertEqual(xmlschema_etree.etree_tostring(elem), '') + + def test_import_element_tree_before(self): + ElementTree = importlib.import_module('xml.etree.ElementTree') + xmlschema_etree = importlib.import_module('xmlschema.etree') + + self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, msg="cElementTree not available!") + elem = xmlschema_etree.PyElementTree.Element('element') + self.assertEqual(xmlschema_etree.etree_tostring(elem), '') + self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) + self.assertIs(xmlschema_etree.ElementTree, ElementTree) + + def test_import_element_tree_after(self): + xmlschema_etree = importlib.import_module('xmlschema.etree') + ElementTree = importlib.import_module('xml.etree.ElementTree') + + self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, msg="cElementTree not available!") + elem = xmlschema_etree.PyElementTree.Element('element') + self.assertEqual(xmlschema_etree.etree_tostring(elem), '') + self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) + self.assertIs(xmlschema_etree.ElementTree, ElementTree) + + def test_element_tree_import_script(self): + test_dir = os.path.dirname(__file__) or '.' + + cmd = [os.path.join(test_dir, 'check_etree_import.py')] + process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = process.stdout.decode('utf-8') + self.assertTrue("\nTest OK:" in output, msg="Wrong import of ElementTree after xmlschema") + + cmd.append('--before') + process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = process.stdout.decode('utf-8') + self.assertTrue("\nTest OK:" in output, msg="Wrong import of ElementTree before xmlschema") + + def test_safe_xml_parser(self): + test_dir = os.path.dirname(__file__) or '.' + xmlschema_etree = importlib.import_module('xmlschema.etree') + parser = xmlschema_etree.SafeXMLParser(target=xmlschema_etree.PyElementTree.TreeBuilder()) + PyElementTree = xmlschema_etree.PyElementTree + + xml_file = os.path.join(test_dir, 'test_cases/resources/with_entity.xml') + elem = xmlschema_etree.ElementTree.parse(xml_file).getroot() + self.assertEqual(elem.text, 'abc') + self.assertRaises( + PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser + ) + + xml_file = os.path.join(test_dir, 'test_cases/resources/unused_external_entity.xml') + elem = xmlschema_etree.ElementTree.parse(xml_file).getroot() + self.assertEqual(elem.text, 'abc') + self.assertRaises( + PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser + ) + + xml_file = os.path.join(test_dir, 'test_cases/resources/external_entity.xml') + self.assertRaises(xmlschema_etree.ParseError, xmlschema_etree.ElementTree.parse, xml_file) + self.assertRaises( + PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser + ) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/test_factory/__init__.py b/xmlschema/tests/test_factory/__init__.py new file mode 100644 index 0000000..ad56606 --- /dev/null +++ b/xmlschema/tests/test_factory/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +""" +Test factory subpackage for creating test cases from lists of paths to XSD or XML files. + +The list of cases can be defined within files named "testfiles". These are text files +that contain a list of relative paths to XSD or XML files, that are used to dinamically +build a set of test classes. Each path is followed by a list of options that defines a +custom setting for each test. +""" +from .arguments import TEST_FACTORY_OPTIONS, xsd_version_number, create_test_line_args_parser +from .factory import tests_factory +from .observers import SchemaObserver, ObservedXMLSchema10, ObservedXMLSchema11 +from .schema_tests import make_schema_test_class +from .validation_tests import make_validator_test_class diff --git a/xmlschema/tests/test_factory.py b/xmlschema/tests/test_factory/arguments.py similarity index 54% rename from xmlschema/tests/test_factory.py rename to xmlschema/tests/test_factory/arguments.py index 93c8f65..95ff4c2 100644 --- a/xmlschema/tests/test_factory.py +++ b/xmlschema/tests/test_factory/arguments.py @@ -18,17 +18,7 @@ custom setting for each test. """ import sys import re -import os -import glob -import fileinput import argparse -import logging - -from xmlschema.validators import XMLSchema10, XMLSchema11 -from .schema_observers import ObservedXMLSchema10, ObservedXMLSchema11 - -logger = logging.getLogger(__file__) - TEST_FACTORY_OPTIONS = { 'extra_cases': '-x' in sys.argv or '--extra' in sys.argv, # Include extra test cases @@ -108,86 +98,3 @@ def create_test_line_args_parser(): help="Activate the debug mode (only the cases with --debug are executed).", ) return parser - - -test_line_parser = create_test_line_args_parser() - - -def tests_factory(test_class_builder, suffix='xml'): - """ - Factory function for file based schema/validation cases. - - :param test_class_builder: the test class builder function. - :param suffix: the suffix ('xml' or 'xsd') to consider for cases. - :return: a list of test classes. - """ - test_classes = {} - test_num = 0 - debug_mode = False - line_buffer = [] - - test_dir = os.path.dirname(os.path.abspath(__file__)) - testfiles = [os.path.join(test_dir, 'test_cases/testfiles')] - if TEST_FACTORY_OPTIONS['extra_cases'] and test_dir != os.getcwd(): - testfiles.extend(glob.glob(os.path.join(os.getcwd(), 'test_cases/testfiles'))) - - for line in fileinput.input(testfiles): - line = line.strip() - if not line or line[0] == '#': - if not line_buffer: - continue - else: - raise SyntaxError("Empty continuation at line %d!" % fileinput.filelineno()) - elif '#' in line: - line = line.split('#', 1)[0].rstrip() - - # Process line continuations - if line[-1] == '\\': - line_buffer.append(line[:-1].strip()) - continue - elif line_buffer: - line_buffer.append(line) - line = ' '.join(line_buffer) - del line_buffer[:] - - test_args = test_line_parser.parse_args(get_test_args(line)) - if test_args.locations is not None: - test_args.locations = {k.strip('\'"'): v for k, v in test_args.locations} - - test_file = os.path.join(os.path.dirname(fileinput.filename()), test_args.filename) - if os.path.isdir(test_file): - logger.debug("Skip %s: is a directory.", test_file) - continue - elif os.path.splitext(test_file)[1].lower() != '.%s' % suffix: - logger.debug("Skip %s: wrong suffix.", test_file) - continue - elif not os.path.isfile(test_file): - logger.error("Skip %s: is not a file.", test_file) - continue - - test_num += 1 - - # Debug mode activation - if debug_mode: - if not test_args.debug: - continue - elif test_args.debug: - debug_mode = True - logger.debug("Debug mode activated: discard previous %r test classes.", len(test_classes)) - test_classes.clear() - - if test_args.version == '1.0': - schema_class = ObservedXMLSchema10 if test_args.inspect else XMLSchema10 - check_with_lxml = TEST_FACTORY_OPTIONS['check_with_lxml'] - else: - schema_class = ObservedXMLSchema11 if test_args.inspect else XMLSchema11 - check_with_lxml = False - - test_class = test_class_builder(test_file, test_args, test_num, schema_class, check_with_lxml) - test_classes[test_class.__name__] = test_class - logger.debug("Add XSD %s test class %r.", test_args.version, test_class.__name__) - - if line_buffer: - raise ValueError("Not completed line continuation at the end!") - - return test_classes diff --git a/xmlschema/tests/test_factory/factory.py b/xmlschema/tests/test_factory/factory.py new file mode 100644 index 0000000..53e3700 --- /dev/null +++ b/xmlschema/tests/test_factory/factory.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import os +import glob +import fileinput +import logging + +from xmlschema.validators import XMLSchema10, XMLSchema11 +from .arguments import TEST_FACTORY_OPTIONS, get_test_args, create_test_line_args_parser +from .observers import ObservedXMLSchema10, ObservedXMLSchema11 + +logger = logging.getLogger(__file__) + + +test_line_parser = create_test_line_args_parser() + + +def tests_factory(test_class_builder, suffix='xml'): + """ + Factory function for file based schema/validation cases. + + :param test_class_builder: the test class builder function. + :param suffix: the suffix ('xml' or 'xsd') to consider for cases. + :return: a list of test classes. + """ + test_classes = {} + test_num = 0 + debug_mode = False + line_buffer = [] + + test_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + testfiles = [os.path.join(test_dir, 'test_cases/testfiles')] + if TEST_FACTORY_OPTIONS['extra_cases']: + package_dir = os.path.dirname(os.path.dirname(test_dir)) + testfiles.extend(glob.glob(os.path.join(package_dir, 'test_cases/testfiles'))) + + for line in fileinput.input(testfiles): + line = line.strip() + if not line or line[0] == '#': + if not line_buffer: + continue + else: + raise SyntaxError("Empty continuation at line %d!" % fileinput.filelineno()) + elif '#' in line: + line = line.split('#', 1)[0].rstrip() + + # Process line continuations + if line[-1] == '\\': + line_buffer.append(line[:-1].strip()) + continue + elif line_buffer: + line_buffer.append(line) + line = ' '.join(line_buffer) + del line_buffer[:] + + test_args = test_line_parser.parse_args(get_test_args(line)) + if test_args.locations is not None: + test_args.locations = {k.strip('\'"'): v for k, v in test_args.locations} + + test_file = os.path.join(os.path.dirname(fileinput.filename()), test_args.filename) + if os.path.isdir(test_file): + logger.debug("Skip %s: is a directory.", test_file) + continue + elif os.path.splitext(test_file)[1].lower() != '.%s' % suffix: + logger.debug("Skip %s: wrong suffix.", test_file) + continue + elif not os.path.isfile(test_file): + logger.error("Skip %s: is not a file.", test_file) + continue + + test_num += 1 + + # Debug mode activation + if debug_mode: + if not test_args.debug: + continue + elif test_args.debug: + debug_mode = True + logger.debug("Debug mode activated: discard previous %r test classes.", len(test_classes)) + test_classes.clear() + + if test_args.version == '1.0': + schema_class = ObservedXMLSchema10 if test_args.inspect else XMLSchema10 + check_with_lxml = TEST_FACTORY_OPTIONS['check_with_lxml'] + else: + schema_class = ObservedXMLSchema11 if test_args.inspect else XMLSchema11 + check_with_lxml = False + + test_class = test_class_builder(test_file, test_args, test_num, schema_class, check_with_lxml) + test_classes[test_class.__name__] = test_class + logger.debug("Add XSD %s test class %r.", test_args.version, test_class.__name__) + + if line_buffer: + raise ValueError("Not completed line continuation at the end!") + + return test_classes diff --git a/xmlschema/tests/schema_observers.py b/xmlschema/tests/test_factory/observers.py similarity index 100% rename from xmlschema/tests/schema_observers.py rename to xmlschema/tests/test_factory/observers.py diff --git a/xmlschema/tests/test_factory/schema_tests.py b/xmlschema/tests/test_factory/schema_tests.py new file mode 100644 index 0000000..6796ef3 --- /dev/null +++ b/xmlschema/tests/test_factory/schema_tests.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import pdb +import os +import pickle +import time +import logging +import warnings + +from xmlschema import XMLSchemaBase +from xmlschema.compat import PY3, unicode_type +from xmlschema.etree import lxml_etree, py_etree_element +from xmlschema.xpath import XMLSchemaContext +from xmlschema.validators import XsdValidator + +from xmlschema.tests import XsdValidatorTestCase +from .observers import SchemaObserver + + +def make_schema_test_class(test_file, test_args, test_num, schema_class, check_with_lxml): + """ + Creates a schema test class. + + :param test_file: the schema test file path. + :param test_args: line arguments for test case. + :param test_num: a positive integer number associated with the test case. + :param schema_class: the schema class to use. + :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \ + Works only for XSD 1.0 tests. + """ + xsd_file = os.path.relpath(test_file) + + # Extract schema test arguments + expected_errors = test_args.errors + expected_warnings = test_args.warnings + inspect = test_args.inspect + locations = test_args.locations + defuse = test_args.defuse + debug_mode = test_args.debug + loglevel = logging.DEBUG if debug_mode else None + + class TestSchema(XsdValidatorTestCase): + + @classmethod + def setUpClass(cls): + cls.schema_class = schema_class + cls.errors = [] + cls.longMessage = True + + if debug_mode: + print("\n##\n## Testing %r schema in debug mode.\n##" % xsd_file) + pdb.set_trace() + + def check_xsd_file(self): + if expected_errors > 0: + xs = schema_class(xsd_file, validation='lax', locations=locations, + defuse=defuse, loglevel=loglevel) + else: + xs = schema_class(xsd_file, locations=locations, defuse=defuse, loglevel=loglevel) + self.errors.extend(xs.maps.all_errors) + + if inspect: + components_ids = set([id(c) for c in xs.maps.iter_components()]) + missing = [c for c in SchemaObserver.components if id(c) not in components_ids] + if any(c for c in missing): + raise ValueError("schema missing %d components: %r" % (len(missing), missing)) + + # Pickling test (only for Python 3, skip inspected schema classes test) + if not inspect and PY3: + try: + obj = pickle.dumps(xs) + deserialized_schema = pickle.loads(obj) + except pickle.PicklingError: + # Don't raise if some schema parts (eg. a schema loaded from remote) + # are built with the SafeXMLParser that uses pure Python elements. + for e in xs.maps.iter_components(): + elem = getattr(e, 'elem', getattr(e, 'root', None)) + if isinstance(elem, py_etree_element): + break + else: + raise + else: + self.assertTrue(isinstance(deserialized_schema, XMLSchemaBase)) + self.assertEqual(xs.built, deserialized_schema.built) + + # XPath API tests + if not inspect and not self.errors: + context = XMLSchemaContext(xs) + elements = [x for x in xs.iter()] + context_elements = [x for x in context.iter() if isinstance(x, XsdValidator)] + self.assertEqual(context_elements, [x for x in context.iter_descendants()]) + self.assertEqual(context_elements, elements) + + def check_xsd_file_with_lxml(self, xmlschema_time): + start_time = time.time() + lxs = lxml_etree.parse(xsd_file) + try: + lxml_etree.XMLSchema(lxs.getroot()) + except lxml_etree.XMLSchemaParseError as err: + if not self.errors: + print("\nSchema error with lxml.etree.XMLSchema for file {!r} ({}): {}".format( + xsd_file, self.__class__.__name__, unicode_type(err) + )) + else: + if self.errors: + print("\nUnrecognized errors with lxml.etree.XMLSchema for file {!r} ({}): {}".format( + xsd_file, self.__class__.__name__, + '\n++++++\n'.join([unicode_type(e) for e in self.errors]) + )) + lxml_schema_time = time.time() - start_time + if lxml_schema_time >= xmlschema_time: + print( + "\nSlower lxml.etree.XMLSchema ({:.3f}s VS {:.3f}s) with file {!r} ({})".format( + lxml_schema_time, xmlschema_time, xsd_file, self.__class__.__name__ + )) + + def test_xsd_file(self): + if inspect: + SchemaObserver.clear() + del self.errors[:] + + start_time = time.time() + if expected_warnings > 0: + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter("always") + self.check_xsd_file() + self.assertEqual(len(ctx), expected_warnings, + "%r: Wrong number of include/import warnings" % xsd_file) + else: + self.check_xsd_file() + + # Check with lxml.etree.XMLSchema class + if check_with_lxml and lxml_etree is not None: + self.check_xsd_file_with_lxml(xmlschema_time=time.time() - start_time) + self.check_errors(xsd_file, expected_errors) + + TestSchema.__name__ = TestSchema.__qualname__ = str('TestSchema{0:03}'.format(test_num)) + return TestSchema diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py new file mode 100644 index 0000000..dfd2d50 --- /dev/null +++ b/xmlschema/tests/test_factory/validation_tests.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import unittest +import pdb +import os +import sys +import pickle +import warnings + +import xmlschema +from xmlschema import XMLSchemaValidationError, ParkerConverter, \ + BadgerFishConverter, AbderaConverter, JsonMLConverter + +from xmlschema.compat import unicode_type, ordered_dict_class +from xmlschema.etree import etree_tostring, ElementTree, \ + etree_elements_assert_equal, lxml_etree, lxml_etree_element +from xmlschema.qnames import XSI_TYPE +from xmlschema.resources import fetch_namespaces + +from xmlschema.tests import XsdValidatorTestCase +from . import tests_factory + + +def iter_nested_items(items, dict_class=dict, list_class=list): + if isinstance(items, dict_class): + for k, v in items.items(): + for value in iter_nested_items(v, dict_class, list_class): + yield value + elif isinstance(items, list_class): + for item in items: + for value in iter_nested_items(item, dict_class, list_class): + yield value + elif isinstance(items, dict): + raise TypeError("%r: is a dict() instead of %r." % (items, dict_class)) + elif isinstance(items, list): + raise TypeError("%r: is a list() instead of %r." % (items, list_class)) + else: + yield items + + +def make_validator_test_class(test_file, test_args, test_num, schema_class, check_with_lxml): + """ + Creates a validator test class. + + :param test_file: the XML test file path. + :param test_args: line arguments for test case. + :param test_num: a positive integer number associated with the test case. + :param schema_class: the schema class to use. + :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \ + Works only for XSD 1.0 tests. + """ + xml_file = os.path.relpath(test_file) + msg_tmpl = "\n\n{}: %s.".format(xml_file) + + # Extract schema test arguments + expected_errors = test_args.errors + expected_warnings = test_args.warnings + inspect = test_args.inspect + locations = test_args.locations + defuse = test_args.defuse + skip_strict = test_args.skip + debug_mode = test_args.debug + + class TestValidator(XsdValidatorTestCase): + + @classmethod + def setUpClass(cls): + # Builds schema instance using 'lax' validation mode to accepts also schemas with not crashing errors. + cls.schema_class = schema_class + source, _locations = xmlschema.fetch_schema_locations(xml_file, locations) + cls.schema = schema_class(source, validation='lax', locations=_locations, defuse=defuse) + if check_with_lxml and lxml_etree is not None: + cls.lxml_schema = lxml_etree.parse(source) + + cls.errors = [] + cls.chunks = [] + cls.longMessage = True + + if debug_mode: + print("\n##\n## Testing %r validation in debug mode.\n##" % xml_file) + pdb.set_trace() + + def check_etree_encode(self, root, converter=None, **kwargs): + namespaces = kwargs.get('namespaces', {}) + data1 = self.schema.decode(root, converter=converter, **kwargs) + if isinstance(data1, tuple): + data1 = data1[0] # When validation='lax' + + for _ in iter_nested_items(data1, dict_class=ordered_dict_class): + pass + + try: + elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs) + except XMLSchemaValidationError as err: + raise AssertionError(str(err) + msg_tmpl % "error during re-encoding") + + if isinstance(elem1, tuple): + # When validation='lax' + if converter is not ParkerConverter: + for e in elem1[1]: + self.check_namespace_prefixes(unicode_type(e)) + elem1 = elem1[0] + + # Checks the encoded element to not contains reserved namespace prefixes + if namespaces and all('ns%d' % k not in namespaces for k in range(10)): + self.check_namespace_prefixes(etree_tostring(elem1, namespaces=namespaces)) + + # Main check: compare original a re-encoded tree + try: + etree_elements_assert_equal(root, elem1, strict=False) + except AssertionError as err: + # If the check fails retry only if the converter is lossy (eg. ParkerConverter) + # or if the XML case has defaults taken from the schema or some part of data + # decoding is skipped by schema wildcards (set the specific argument in testfiles). + if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: + if debug_mode: + pdb.set_trace() + raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original") + elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): + return # can't check encode equivalence if xsi:type is provided + else: + # Lossy or augmenting cases are checked after another decoding/encoding pass + data2 = self.schema.decode(elem1, converter=converter, **kwargs) + if isinstance(data2, tuple): + data2 = data2[0] + + if sys.version_info >= (3, 6): + # For Python < 3.6 cannot ensure attribute decoding order + try: + self.assertEqual(data1, data2, msg_tmpl % "re-decoded data changed") + except AssertionError: + if debug_mode: + pdb.set_trace() + raise + + elem2 = self.schema.encode(data2, path=root.tag, converter=converter, **kwargs) + if isinstance(elem2, tuple): + elem2 = elem2[0] + + try: + etree_elements_assert_equal(elem1, elem2, strict=False) + except AssertionError as err: + if debug_mode: + pdb.set_trace() + raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass") + + def check_json_serialization(self, root, converter=None, **kwargs): + data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) + if isinstance(data1, tuple): + data1 = data1[0] + + elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) + if isinstance(elem1, tuple): + elem1 = elem1[0] + + data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) + if isinstance(data2, tuple): + data2 = data2[0] + + if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): + return # can't check encode equivalence if xsi:type is provided + elif sys.version_info >= (3, 6): + self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass") + else: + elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) + if isinstance(elem2, tuple): + elem2 = elem2[0] + try: + self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True)) + except AssertionError as err: + self.assertIsNone(err, None) + + def check_decoding_with_element_tree(self): + del self.errors[:] + del self.chunks[:] + + def do_decoding(): + for obj in self.schema.iter_decode(xml_file): + if isinstance(obj, (xmlschema.XMLSchemaDecodeError, xmlschema.XMLSchemaValidationError)): + self.errors.append(obj) + else: + self.chunks.append(obj) + + if expected_warnings == 0: + do_decoding() + else: + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter("always") + do_decoding() + self.assertEqual(len(ctx), expected_warnings, "Wrong number of include/import warnings") + + self.check_errors(xml_file, expected_errors) + + if not self.chunks: + raise ValueError("No decoded object returned!!") + elif len(self.chunks) > 1: + raise ValueError("Too many ({}) decoded objects returned: {}".format(len(self.chunks), self.chunks)) + elif not isinstance(self.chunks[0], dict): + raise ValueError("Decoded object is not a dictionary: {}".format(self.chunks)) + else: + self.assertTrue(True, "Successfully test decoding for {}".format(xml_file)) + + def check_schema_serialization(self): + # Repeat with serialized-deserialized schema (only for Python 3) + serialized_schema = pickle.dumps(self.schema) + deserialized_schema = pickle.loads(serialized_schema) + errors = [] + chunks = [] + for obj in deserialized_schema.iter_decode(xml_file): + if isinstance(obj, xmlschema.XMLSchemaValidationError): + errors.append(obj) + else: + chunks.append(obj) + + self.assertEqual(len(errors), len(self.errors), msg_tmpl % "wrong number errors") + self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data differ") + + def check_decode_api(self): + # Compare with the decode API and other validation modes + strict_data = self.schema.decode(xml_file) + lax_data = self.schema.decode(xml_file, validation='lax') + skip_data = self.schema.decode(xml_file, validation='skip') + self.assertEqual(strict_data, self.chunks[0], msg_tmpl % "decode() API has a different result") + self.assertEqual(lax_data[0], self.chunks[0], msg_tmpl % "'lax' validation has a different result") + self.assertEqual(skip_data, self.chunks[0], msg_tmpl % "'skip' validation has a different result") + + def check_encoding_with_element_tree(self): + root = ElementTree.parse(xml_file).getroot() + namespaces = fetch_namespaces(xml_file) + options = {'namespaces': namespaces, 'dict_class': ordered_dict_class} + + self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter + self.check_etree_encode(root, ParkerConverter, validation='lax', **options) + self.check_etree_encode(root, ParkerConverter, validation='skip', **options) + self.check_etree_encode(root, BadgerFishConverter, **options) + self.check_etree_encode(root, AbderaConverter, **options) + self.check_etree_encode(root, JsonMLConverter, **options) + + options.pop('dict_class') + self.check_json_serialization(root, cdata_prefix='#', **options) + self.check_json_serialization(root, ParkerConverter, validation='lax', **options) + self.check_json_serialization(root, ParkerConverter, validation='skip', **options) + self.check_json_serialization(root, BadgerFishConverter, **options) + self.check_json_serialization(root, AbderaConverter, **options) + self.check_json_serialization(root, JsonMLConverter, **options) + + def check_decoding_and_encoding_with_lxml(self): + xml_tree = lxml_etree.parse(xml_file) + namespaces = fetch_namespaces(xml_file) + + errors = [] + chunks = [] + for obj in self.schema.iter_decode(xml_tree, namespaces=namespaces): + if isinstance(obj, xmlschema.XMLSchemaValidationError): + errors.append(obj) + else: + chunks.append(obj) + + self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data change with lxml") + self.assertEqual(len(errors), len(self.errors), msg_tmpl % "errors number change with lxml") + + if not errors: + root = xml_tree.getroot() + if namespaces.get(''): + # Add a not empty prefix for encoding to avoid the use of reserved prefix ns0 + namespaces['tns0'] = namespaces[''] + + options = { + 'etree_element_class': lxml_etree_element, + 'namespaces': namespaces, + 'dict_class': ordered_dict_class, + } + self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter + self.check_etree_encode(root, ParkerConverter, validation='lax', **options) + self.check_etree_encode(root, ParkerConverter, validation='skip', **options) + self.check_etree_encode(root, BadgerFishConverter, **options) + self.check_etree_encode(root, AbderaConverter, **options) + self.check_etree_encode(root, JsonMLConverter, **options) + + options.pop('dict_class') + self.check_json_serialization(root, cdata_prefix='#', **options) + self.check_json_serialization(root, ParkerConverter, validation='lax', **options) + self.check_json_serialization(root, ParkerConverter, validation='skip', **options) + self.check_json_serialization(root, BadgerFishConverter, **options) + self.check_json_serialization(root, AbderaConverter, **options) + self.check_json_serialization(root, JsonMLConverter, **options) + + def check_validate_and_is_valid_api(self): + if expected_errors: + self.assertFalse(self.schema.is_valid(xml_file), msg_tmpl % "file with errors is valid") + self.assertRaises(XMLSchemaValidationError, self.schema.validate, xml_file) + else: + self.assertTrue(self.schema.is_valid(xml_file), msg_tmpl % "file without errors is not valid") + self.assertEqual(self.schema.validate(xml_file), None, + msg_tmpl % "file without errors not validated") + + def check_iter_errors(self): + self.assertEqual(len(list(self.schema.iter_errors(xml_file))), expected_errors, + msg_tmpl % "wrong number of errors (%d expected)" % expected_errors) + + def check_lxml_validation(self): + try: + schema = lxml_etree.XMLSchema(self.lxml_schema.getroot()) + except lxml_etree.XMLSchemaParseError: + print("\nSkip lxml.etree.XMLSchema validation test for {!r} ({})". + format(xml_file, TestValidator.__name__, )) + else: + xml_tree = lxml_etree.parse(xml_file) + if self.errors: + self.assertFalse(schema.validate(xml_tree)) + else: + self.assertTrue(schema.validate(xml_tree)) + + def test_xml_document_validation(self): + self.check_decoding_with_element_tree() + + if not inspect and sys.version_info >= (3,): + self.check_schema_serialization() + + if not self.errors: + self.check_encoding_with_element_tree() + + if lxml_etree is not None: + self.check_decoding_and_encoding_with_lxml() + + self.check_iter_errors() + self.check_validate_and_is_valid_api() + if check_with_lxml and lxml_etree is not None: + self.check_lxml_validation() + + TestValidator.__name__ = TestValidator.__qualname__ = 'TestValidator{0:03}'.format(test_num) + return TestValidator + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + # Creates decoding/encoding tests classes from XML files + globals().update(tests_factory(make_validator_test_class, 'xml')) + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/test_files.py b/xmlschema/tests/test_files.py index b242ba1..7f687a3 100644 --- a/xmlschema/tests/test_files.py +++ b/xmlschema/tests/test_files.py @@ -17,10 +17,9 @@ import unittest import os import argparse -import xmlschema -from xmlschema.tests.test_factory import xsd_version_number -from xmlschema.tests.test_schemas import make_schema_test_class -from xmlschema.tests.test_validators import make_validator_test_class +from xmlschema import XMLSchema10, XMLSchema11 +from xmlschema.tests.test_factory import xsd_version_number, \ + make_schema_test_class, make_validator_test_class if __name__ == '__main__': @@ -39,27 +38,39 @@ if __name__ == '__main__': ) args = parser.parse_args() - schema_class = xmlschema.XMLSchema10 if args.version == '1.0' else xmlschema.validators.XMLSchema11 + if args.version == '1.0': + schema_class = XMLSchema10 + check_with_lxml = True + else: + schema_class = XMLSchema11 + check_with_lxml = False + test_num = 1 test_args = argparse.Namespace( errors=0, warnings=0, inspect=False, locations=(), defuse='never', skip=False, debug=False ) + + test_loader = unittest.TestLoader() test_suite = unittest.TestSuite() for test_file in args.files: if not os.path.isfile(test_file): continue elif test_file.endswith('xsd'): - test_class = make_schema_test_class(test_file, test_args, test_num, schema_class, True) + test_class = make_schema_test_class( + test_file, test_args, test_num, schema_class, check_with_lxml + ) test_num += 1 elif test_file.endswith('xml'): - test_class = make_validator_test_class(test_file, test_args, test_num, schema_class, True) + test_class = make_validator_test_class( + test_file, test_args, test_num, schema_class, check_with_lxml + ) test_num += 1 else: continue print("Add test %r for file %r ..." % (test_class.__name__, test_file)) - test_suite.addTest(test_class('run')) + test_suite.addTest(test_loader.loadTestsFromTestCase(test_class)) if test_num == 1: print("No XSD or XML file to test, exiting ...") diff --git a/xmlschema/tests/test_helpers.py b/xmlschema/tests/test_helpers.py index a95522e..be195ef 100644 --- a/xmlschema/tests/test_helpers.py +++ b/xmlschema/tests/test_helpers.py @@ -15,17 +15,26 @@ This module runs tests on various internal helper functions. from __future__ import unicode_literals import unittest +import decimal +import xml.etree.ElementTree as ElementTree -from xmlschema.etree import etree_element -from xmlschema.namespaces import XSD_NAMESPACE, XSI_NAMESPACE -from xmlschema.helpers import get_xsd_annotation, iter_xsd_components, get_namespace, get_qname, \ - local_name, qname_to_prefixed, has_xsd_components, get_xsd_component, \ - get_xml_bool_attribute, get_xsd_derivation_attribute +from xmlschema import XMLSchema, XMLSchemaParseError +from xmlschema.etree import etree_element, prune_etree +from xmlschema.namespaces import XSD_NAMESPACE, XSI_NAMESPACE, get_namespace from xmlschema.qnames import XSI_TYPE, XSD_SCHEMA, XSD_ELEMENT, XSD_SIMPLE_TYPE, XSD_ANNOTATION -from xmlschema.tests import XMLSchemaTestCase +from xmlschema.qnames import get_qname, local_name, qname_to_prefixed +from xmlschema.helpers import get_xsd_annotation, get_xsd_derivation_attribute, count_digits -class TestHelpers(XMLSchemaTestCase): +class TestHelpers(unittest.TestCase): + + @classmethod + def setUpClass(cls): + XMLSchema.meta_schema.build() + + @classmethod + def tearDownClass(cls): + XMLSchema.meta_schema.clear() def test_get_namespace_function(self): self.assertEqual(get_namespace(XSD_SIMPLE_TYPE), XSD_NAMESPACE) @@ -90,79 +99,6 @@ class TestHelpers(XMLSchemaTestCase): elem.append(etree_element(XSD_ANNOTATION)) self.assertIsNone(get_xsd_annotation(elem)) - def test_iter_xsd_components(self): - elem = etree_element(XSD_SCHEMA) - self.assertFalse(list(iter_xsd_components(elem))) - self.assertFalse(list(iter_xsd_components(elem, start=1))) - elem.append(etree_element(XSD_ANNOTATION)) - self.assertFalse(list(iter_xsd_components(elem))) - self.assertFalse(list(iter_xsd_components(elem, start=1))) - elem.append(etree_element(XSD_ELEMENT)) - self.assertEqual(list(iter_xsd_components(elem)), [elem[1]]) - elem.append(etree_element(XSD_SIMPLE_TYPE)) - self.assertEqual(list(iter_xsd_components(elem)), elem[1:]) - self.assertEqual(list(iter_xsd_components(elem, start=1)), [elem[2]]) - elem.append(etree_element(XSD_ANNOTATION)) - self.assertRaises(ValueError, list, iter_xsd_components(elem)) - - def test_has_xsd_components(self): - elem = etree_element(XSD_SCHEMA) - elem.append(etree_element(XSD_ELEMENT)) - self.assertTrue(has_xsd_components(elem)) - - elem.clear() - self.assertFalse(has_xsd_components(elem)) - elem.append(etree_element(XSD_ANNOTATION)) - self.assertFalse(has_xsd_components(elem)) - elem.append(etree_element(XSD_ELEMENT)) - self.assertTrue(has_xsd_components(elem)) - self.assertFalse(has_xsd_components(elem, start=1)) - elem.append(etree_element(XSD_ANNOTATION)) - self.assertRaises(ValueError, list, iter_xsd_components(elem)) - - def test_get_xsd_component(self): - elem = etree_element(XSD_SCHEMA) - self.assertRaises(ValueError, get_xsd_component, elem) - self.assertIsNone(get_xsd_component(elem, required=False)) - elem.append(etree_element(XSD_ELEMENT)) - self.assertEqual(get_xsd_component(elem), elem[0]) - elem.append(etree_element(XSD_SIMPLE_TYPE)) - self.assertRaises(ValueError, get_xsd_component, elem) - self.assertEqual(get_xsd_component(elem, strict=False), elem[0]) - - elem.clear() - elem.append(etree_element(XSD_ANNOTATION)) - self.assertRaises(ValueError, get_xsd_component, elem) - self.assertIsNone(get_xsd_component(elem, required=False)) - elem.append(etree_element(XSD_SIMPLE_TYPE)) - self.assertEqual(get_xsd_component(elem), elem[1]) - elem.append(etree_element(XSD_ELEMENT)) - self.assertRaises(ValueError, get_xsd_component, elem) - self.assertEqual(get_xsd_component(elem, strict=False), elem[1]) - - elem.clear() - elem.append(etree_element(XSD_ANNOTATION)) - elem.append(etree_element(XSD_ANNOTATION)) - self.assertRaises(ValueError, get_xsd_component, elem, True, False) - - def test_get_xml_bool_attribute(self): - elem = etree_element(XSD_ELEMENT, attrib={'a1': 'true', 'a2': '1', 'a3': 'false', 'a4': '0', 'a5': 'x'}) - self.assertEqual(get_xml_bool_attribute(elem, 'a1'), True) - self.assertEqual(get_xml_bool_attribute(elem, 'a2'), True) - self.assertEqual(get_xml_bool_attribute(elem, 'a3'), False) - self.assertEqual(get_xml_bool_attribute(elem, 'a4'), False) - self.assertRaises(TypeError, get_xml_bool_attribute, elem, 'a5') - self.assertRaises(KeyError, get_xml_bool_attribute, elem, 'a6') - self.assertEqual(get_xml_bool_attribute(elem, 'a6', True), True) - self.assertEqual(get_xml_bool_attribute(elem, 'a6', 'true'), True) - self.assertEqual(get_xml_bool_attribute(elem, 'a6', '1'), True) - self.assertEqual(get_xml_bool_attribute(elem, 'a6', False), False) - self.assertEqual(get_xml_bool_attribute(elem, 'a6', 'false'), False) - self.assertEqual(get_xml_bool_attribute(elem, 'a6', '0'), False) - self.assertRaises(TypeError, get_xml_bool_attribute, elem, 'a6', 1) - self.assertRaises(TypeError, get_xml_bool_attribute, elem, 'a6', 0) - self.assertRaises(TypeError, get_xml_bool_attribute, elem, 'a6', 'True') - def test_get_xsd_derivation_attribute(self): elem = etree_element(XSD_ELEMENT, attrib={ 'a1': 'extension', 'a2': ' restriction', 'a3': '#all', 'a4': 'other', @@ -177,6 +113,97 @@ class TestHelpers(XMLSchemaTestCase): self.assertRaises(ValueError, get_xsd_derivation_attribute, elem, 'a6', values) self.assertEqual(get_xsd_derivation_attribute(elem, 'a7', values), '') + def test_parse_component(self): + component = XMLSchema.meta_schema.types['anyType'] + + elem = etree_element(XSD_SCHEMA) + self.assertIsNone(component._parse_child_component(elem)) + elem.append(etree_element(XSD_ELEMENT)) + self.assertEqual(component._parse_child_component(elem), elem[0]) + elem.append(etree_element(XSD_SIMPLE_TYPE)) + self.assertRaises(XMLSchemaParseError, component._parse_child_component, elem) + self.assertEqual(component._parse_child_component(elem, strict=False), elem[0]) + + elem.clear() + elem.append(etree_element(XSD_ANNOTATION)) + self.assertIsNone(component._parse_child_component(elem)) + elem.append(etree_element(XSD_SIMPLE_TYPE)) + self.assertEqual(component._parse_child_component(elem), elem[1]) + elem.append(etree_element(XSD_ELEMENT)) + self.assertRaises(XMLSchemaParseError, component._parse_child_component, elem) + self.assertEqual(component._parse_child_component(elem, strict=False), elem[1]) + + elem.clear() + elem.append(etree_element(XSD_ANNOTATION)) + elem.append(etree_element(XSD_ANNOTATION)) + self.assertIsNone(component._parse_child_component(elem, strict=False)) + elem.append(etree_element(XSD_SIMPLE_TYPE)) + self.assertEqual(component._parse_child_component(elem), elem[2]) + + def test_count_digits_function(self): + self.assertEqual(count_digits(10), (2, 0)) + self.assertEqual(count_digits(-10), (2, 0)) + + self.assertEqual(count_digits(081.2), (2, 1)) + self.assertEqual(count_digits(-081.200), (2, 1)) + self.assertEqual(count_digits(0.51), (0, 2)) + self.assertEqual(count_digits(-0.510), (0, 2)) + self.assertEqual(count_digits(-0.510), (0, 2)) + + self.assertEqual(count_digits(decimal.Decimal('100.0')), (3, 0)) + self.assertEqual(count_digits(decimal.Decimal('100.01')), (3, 2)) + self.assertEqual(count_digits('100.01'), (3, 2)) + + self.assertEqual(count_digits(decimal.Decimal('100.0E+4')), (7, 0)) + self.assertEqual(count_digits(decimal.Decimal('100.00001E+4')), (7, 1)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E4')), (7, 0)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E12')), (15, 0)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E19')), (22, 0)) + + self.assertEqual(count_digits(decimal.Decimal('100.0E-4')), (0, 2)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-4')), (0, 2)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-8')), (0, 6)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-9')), (0, 7)) + self.assertEqual(count_digits(decimal.Decimal('0100.00E-12')), (0, 10)) + self.assertEqual(count_digits(decimal.Decimal('100.10E-4')), (0, 5)) + self.assertEqual(count_digits(decimal.Decimal('0100.10E-12')), (0, 13)) + + +class TestElementTreeHelpers(unittest.TestCase): + + def test_prune_etree_function(self): + root = ElementTree.XML('') + self.assertFalse(prune_etree(root, lambda x: x.tag == 'C')) + self.assertListEqual([e.tag for e in root.iter()], ['A', 'B', 'D']) + self.assertEqual(root.attrib, {'id': '0'}) + + root = ElementTree.XML('') + self.assertTrue(prune_etree(root, lambda x: x.tag != 'C')) + self.assertListEqual([e.tag for e in root.iter()], ['A']) + self.assertEqual(root.attrib, {'id': '1'}) + + class SelectorClass: + tag = 'C' + + @classmethod + def class_method(cls, elem): + return elem.tag == cls.tag + + def method(self, elem): + return elem.tag != self.tag + + selector = SelectorClass() + + root = ElementTree.XML('') + self.assertFalse(prune_etree(root, selector.class_method)) + self.assertListEqual([e.tag for e in root.iter()], ['A', 'B', 'D']) + self.assertEqual(root.attrib, {'id': '0'}) + + root = ElementTree.XML('') + self.assertTrue(prune_etree(root, selector.method)) + self.assertListEqual([e.tag for e in root.iter()], ['A']) + self.assertEqual(root.attrib, {'id': '1'}) + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/tests/test_memory.py b/xmlschema/tests/test_memory.py new file mode 100644 index 0000000..69912cf --- /dev/null +++ b/xmlschema/tests/test_memory.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2018-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import unittest +import os +import decimal +import subprocess + + +class TestMemoryUsage(unittest.TestCase): + + @staticmethod + def check_memory_profile(output): + """Check the output of a memory memory profile run on a function.""" + mem_usage = [] + func_num = 0 + for line in output.split('\n'): + parts = line.split() + if 'def' in parts: + func_num += 1 + if not parts or not parts[0].isdigit() or len(parts) == 1 \ + or not parts[1].replace('.', '').isdigit(): + continue + mem_usage.append(decimal.Decimal(parts[1])) + + if func_num > 1: + raise ValueError("Cannot the a memory profile output of more than one function!") + return max(v - mem_usage[0] for v in mem_usage[1:]) + + @unittest.skip + def test_package_memory_usage(self): + test_dir = os.path.dirname(__file__) or '.' + cmd = [os.path.join(test_dir, 'check_memory.py'), '1'] + output = subprocess.check_output(cmd, universal_newlines=True) + package_mem = self.check_memory_profile(output) + self.assertLess(package_mem, 20) + + def test_element_tree_memory_usage(self): + test_dir = os.path.dirname(__file__) or '.' + xsd10_schema_file = os.path.join( + os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' + ) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '2', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + parse_mem = self.check_memory_profile(output) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '3', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + iterparse_mem = self.check_memory_profile(output) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '4', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + lazy_iterparse_mem = self.check_memory_profile(output) + + self.assertLess(parse_mem, 2) + self.assertLessEqual(lazy_iterparse_mem, parse_mem / 2) + self.assertLessEqual(lazy_iterparse_mem, iterparse_mem) + + def test_decode_memory_usage(self): + test_dir = os.path.dirname(__file__) or '.' + xsd10_schema_file = os.path.join( + os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' + ) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '5', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + decode_mem = self.check_memory_profile(output) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '6', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + lazy_decode_mem = self.check_memory_profile(output) + + self.assertLess(decode_mem, 2.6) + self.assertLessEqual(lazy_decode_mem, decode_mem / decimal.Decimal('1.1')) + + def test_validate_memory_usage(self): + test_dir = os.path.dirname(__file__) or '.' + xsd10_schema_file = os.path.join( + os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' + ) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '7', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + validate_mem = self.check_memory_profile(output) + + cmd = [os.path.join(test_dir, 'check_memory.py'), '8', xsd10_schema_file] + output = subprocess.check_output(cmd, universal_newlines=True) + lazy_validate_mem = self.check_memory_profile(output) + + self.assertLess(validate_mem, 2.6) + self.assertLessEqual(lazy_validate_mem, validate_mem / decimal.Decimal('1.1')) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/test_meta.py b/xmlschema/tests/test_meta.py index 046dcd2..e4d9fd3 100644 --- a/xmlschema/tests/test_meta.py +++ b/xmlschema/tests/test_meta.py @@ -14,19 +14,20 @@ This module runs tests on XSD meta schema and builtins of the 'xmlschema' packag """ import unittest -import xmlschema -from xmlschema import XMLSchemaDecodeError, XMLSchemaEncodeError, XMLSchemaValidationError +from xmlschema import XMLSchemaDecodeError, XMLSchemaEncodeError, XMLSchemaValidationError, \ + XMLSchema10, XMLSchema11 from xmlschema.validators.builtins import HEX_BINARY_PATTERN, NOT_BASE64_BINARY_PATTERN -xsd_10_meta_schema = xmlschema.XMLSchema.meta_schema -xsd_11_meta_schema = xmlschema.validators.XMLSchema11.meta_schema - class TestXsd10BuiltinTypes(unittest.TestCase): @classmethod def setUpClass(cls): - cls.meta_schema = xsd_10_meta_schema + cls.types = XMLSchema10.builtin_types() + + @classmethod + def tearDownClass(cls): + XMLSchema10.meta_schema.clear() def test_hex_binary_pattern(self): self.assertEqual(HEX_BINARY_PATTERN.search("aff1c").group(0), 'aff1c') @@ -37,52 +38,51 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertEqual(NOT_BASE64_BINARY_PATTERN.search("YWVpb3U!=").group(0), '!') def test_boolean_decode(self): - xsd_type = self.meta_schema.types['boolean'] - self.assertTrue(xsd_type.decode(' true \n') is True) - self.assertTrue(xsd_type.decode(' 0 \n') is False) - self.assertTrue(xsd_type.decode(' 1 \n') is True) - self.assertTrue(xsd_type.decode(' false \n') is False) - self.assertRaises(XMLSchemaDecodeError, xsd_type.decode, ' 1.0 ') - self.assertRaises(XMLSchemaDecodeError, xsd_type.decode, ' alpha \n') + boolean_type = self.types['boolean'] + self.assertTrue(boolean_type.decode(' true \n') is True) + self.assertTrue(boolean_type.decode(' 0 \n') is False) + self.assertTrue(boolean_type.decode(' 1 \n') is True) + self.assertTrue(boolean_type.decode(' false \n') is False) + self.assertRaises(XMLSchemaDecodeError, boolean_type.decode, ' 1.0 ') + self.assertRaises(XMLSchemaDecodeError, boolean_type.decode, ' alpha \n') def test_boolean_encode(self): - xsd_type = self.meta_schema.types['boolean'] - self.assertTrue(xsd_type.encode(True) == 'true') - self.assertTrue(xsd_type.encode(False) == 'false') - self.assertRaises(XMLSchemaEncodeError, xsd_type.encode, 1) - self.assertRaises(XMLSchemaEncodeError, xsd_type.encode, 0) - self.assertRaises(XMLSchemaEncodeError, xsd_type.encode, 10) - self.assertRaises(XMLSchemaEncodeError, xsd_type.encode, 'alpha') + boolean_type = self.types['boolean'] + self.assertTrue(boolean_type.encode(True) == 'true') + self.assertTrue(boolean_type.encode(False) == 'false') + self.assertRaises(XMLSchemaEncodeError, boolean_type.encode, 1) + self.assertRaises(XMLSchemaEncodeError, boolean_type.encode, 0) + self.assertRaises(XMLSchemaEncodeError, boolean_type.encode, 10) + self.assertRaises(XMLSchemaEncodeError, boolean_type.encode, 'alpha') def test_integer_decode(self): - xsd_types = self.meta_schema.types - self.assertTrue(xsd_types['integer'].decode(' 1000 \n') == 1000) - self.assertTrue(xsd_types['integer'].decode(' -19 \n') == -19) - self.assertTrue(xsd_types['integer'].decode(' 0\n') == 0) - self.assertRaises(XMLSchemaDecodeError, xsd_types['integer'].decode, ' 1000.0 \n') - self.assertRaises(XMLSchemaDecodeError, xsd_types['integer'].decode, ' alpha \n') - self.assertRaises(XMLSchemaValidationError, xsd_types['byte'].decode, ' 257 \n') - self.assertRaises(XMLSchemaValidationError, xsd_types['unsignedInt'].decode, ' -1') + integer_type = self.types['integer'] + self.assertTrue(integer_type.decode(' 1000 \n') == 1000) + self.assertTrue(integer_type.decode(' -19 \n') == -19) + self.assertTrue(integer_type.decode(' 0\n') == 0) + self.assertRaises(XMLSchemaDecodeError, integer_type.decode, ' 1000.0 \n') + self.assertRaises(XMLSchemaDecodeError, integer_type.decode, ' alpha \n') + self.assertRaises(XMLSchemaValidationError, self.types['byte'].decode, ' 257 \n') + self.assertRaises(XMLSchemaValidationError, self.types['unsignedInt'].decode, ' -1') def test_integer_encode(self): - xsd_types = self.meta_schema.types - self.assertTrue(xsd_types['integer'].encode(1000) == '1000') - self.assertTrue(xsd_types['integer'].encode(-19) == '-19') - self.assertTrue(xsd_types['integer'].encode(0) == '0') - self.assertRaises(XMLSchemaEncodeError, xsd_types['integer'].encode, 10.1) - self.assertRaises(XMLSchemaEncodeError, xsd_types['integer'].encode, 'alpha') - self.assertRaises(XMLSchemaValidationError, xsd_types['unsignedInt'].decode, ' -1') + integer_type = self.types['integer'] + self.assertTrue(integer_type.encode(1000) == '1000') + self.assertTrue(integer_type.encode(-19) == '-19') + self.assertTrue(integer_type.encode(0) == '0') + self.assertRaises(XMLSchemaEncodeError, integer_type.encode, 10.1) + self.assertRaises(XMLSchemaEncodeError, integer_type.encode, 'alpha') + self.assertRaises(XMLSchemaValidationError, self.types['unsignedInt'].decode, ' -1') def test_float_decode(self): - xsd_types = self.meta_schema.types - self.assertTrue(xsd_types['float'].decode(' 1000.1 \n') == 1000.10) - self.assertTrue(xsd_types['float'].decode(' -19 \n') == -19.0) - self.assertTrue(xsd_types['double'].decode(' 0.0001\n') == 0.0001) - self.assertRaises(XMLSchemaDecodeError, xsd_types['float'].decode, ' true ') - self.assertRaises(XMLSchemaDecodeError, xsd_types['double'].decode, ' alpha \n') + self.assertTrue(self.types['float'].decode(' 1000.1 \n') == 1000.10) + self.assertTrue(self.types['float'].decode(' -19 \n') == -19.0) + self.assertTrue(self.types['double'].decode(' 0.0001\n') == 0.0001) + self.assertRaises(XMLSchemaDecodeError, self.types['float'].decode, ' true ') + self.assertRaises(XMLSchemaDecodeError, self.types['double'].decode, ' alpha \n') def test_float_encode(self): - float_type = self.meta_schema.types['float'] + float_type = self.types['float'] self.assertTrue(float_type.encode(1000.0) == '1000.0') self.assertTrue(float_type.encode(-19.0) == '-19.0') self.assertTrue(float_type.encode(0.0) == '0.0') @@ -90,7 +90,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertRaises(XMLSchemaEncodeError, float_type.encode, 'alpha') def test_time_type(self): - time_type = self.meta_schema.types['time'] + time_type = self.types['time'] self.assertTrue(time_type.is_valid('14:35:00')) self.assertTrue(time_type.is_valid('14:35:20.5345')) self.assertTrue(time_type.is_valid('14:35:00-01:00')) @@ -103,7 +103,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(time_type.is_valid('14:35.5:00')) def test_datetime_type(self): - datetime_type = self.meta_schema.types['dateTime'] + datetime_type = self.types['dateTime'] self.assertTrue(datetime_type.is_valid('2007-05-10T14:35:00')) self.assertTrue(datetime_type.is_valid('2007-05-10T14:35:20.6')) self.assertTrue(datetime_type.is_valid('2007-05-10T14:35:00-03:00')) @@ -118,16 +118,12 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertTrue(datetime_type.is_valid('2018-10-10T13:57:53.0702116-04:00')) def test_date_type(self): - date_type = self.meta_schema.types['date'] + date_type = self.types['date'] self.assertTrue(date_type.is_valid('2012-05-31')) self.assertTrue(date_type.is_valid('-0065-10-15')) self.assertTrue(date_type.is_valid('12012-05-31')) self.assertTrue(date_type.is_valid('2012-05-31-05:00')) self.assertTrue(date_type.is_valid('2015-06-30Z')) - if self.meta_schema.XSD_VERSION > '1.0': - self.assertTrue(date_type.is_valid('0000-01-01')) - else: - self.assertFalse(date_type.is_valid('0000-01-01')) self.assertFalse(date_type.is_valid('12-05-31')) self.assertFalse(date_type.is_valid('2012-5-31')) self.assertFalse(date_type.is_valid('31-05-2012')) @@ -135,8 +131,11 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(date_type.is_valid('+2012-05-31')) self.assertFalse(date_type.is_valid('')) + def test_year_zero(self): + self.assertFalse(self.types['date'].is_valid('0000-01-01')) + def test_g_year_type(self): - g_year_type = self.meta_schema.types['gYear'] + g_year_type = self.types['gYear'] self.assertTrue(g_year_type.is_valid('2007')) self.assertTrue(g_year_type.is_valid('2013-01:00')) self.assertTrue(g_year_type.is_valid('102013-01:00')) @@ -149,7 +148,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(g_year_type.is_valid('')) def test_g_year_month_type(self): - g_year_month_type = self.meta_schema.types['gYearMonth'] + g_year_month_type = self.types['gYearMonth'] self.assertTrue(g_year_month_type.is_valid('2010-07')) self.assertTrue(g_year_month_type.is_valid('2020-01-05:00')) self.assertFalse(g_year_month_type.is_valid('99-02')) @@ -159,7 +158,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(g_year_month_type.is_valid('')) def test_g_month_type(self): - g_month_type = self.meta_schema.types['gMonth'] + g_month_type = self.types['gMonth'] self.assertTrue(g_month_type.is_valid('--08')) self.assertTrue(g_month_type.is_valid('--05-03:00')) self.assertFalse(g_month_type.is_valid('03')) @@ -169,7 +168,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(g_month_type.is_valid('')) def test_g_month_day_type(self): - g_month_day_type = self.meta_schema.types['gMonthDay'] + g_month_day_type = self.types['gMonthDay'] self.assertTrue(g_month_day_type.is_valid('--12-24')) self.assertTrue(g_month_day_type.is_valid('--04-25Z')) self.assertFalse(g_month_day_type.is_valid('12-24')) @@ -179,7 +178,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(g_month_day_type.is_valid('')) def test_g_day_type(self): - g_day_type = self.meta_schema.types['gDay'] + g_day_type = self.types['gDay'] self.assertTrue(g_day_type.is_valid('---19')) self.assertTrue(g_day_type.is_valid('---07')) self.assertFalse(g_day_type.is_valid('---32')) @@ -189,7 +188,7 @@ class TestXsd10BuiltinTypes(unittest.TestCase): self.assertFalse(g_day_type.is_valid('')) def test_duration_type(self): - duration_type = self.meta_schema.types['duration'] + duration_type = self.types['duration'] self.assertTrue(duration_type.is_valid('-P809YT3H5M5S')) self.assertTrue(duration_type.is_valid('P5Y7M20DT3H5M5S')) self.assertTrue(duration_type.is_valid('P1DT6H')) @@ -216,10 +215,17 @@ class TestXsd11BuiltinTypes(TestXsd10BuiltinTypes): @classmethod def setUpClass(cls): - cls.meta_schema = xsd_11_meta_schema + cls.types = XMLSchema11.builtin_types() + + @classmethod + def tearDownClass(cls): + XMLSchema11.meta_schema.clear() + + def test_year_zero(self): + self.assertTrue(self.types['date'].is_valid('0000-01-01')) def test_date_time_stamp(self): - date_time_stamp_type = self.meta_schema.types['dateTimeStamp'] + date_time_stamp_type = self.types['dateTimeStamp'] self.assertTrue(date_time_stamp_type.is_valid('2003-10-20T16:50:08-03:00')) self.assertTrue(date_time_stamp_type.is_valid('2003-10-20T16:50:08Z')) self.assertFalse(date_time_stamp_type.is_valid('2003-10-20T16:50:08')) @@ -227,7 +233,7 @@ class TestXsd11BuiltinTypes(TestXsd10BuiltinTypes): self.assertFalse(date_time_stamp_type.is_valid('')) def test_day_time_duration_type(self): - day_time_duration_type = self.meta_schema.types['dayTimeDuration'] + day_time_duration_type = self.types['dayTimeDuration'] self.assertTrue(day_time_duration_type.is_valid('P7DT15H40M0S')) self.assertTrue(day_time_duration_type.is_valid('-P10D')) self.assertTrue(day_time_duration_type.is_valid('P0D')) @@ -245,7 +251,7 @@ class TestXsd11BuiltinTypes(TestXsd10BuiltinTypes): self.assertFalse(day_time_duration_type.is_valid('')) def test_year_month_duration_type(self): - year_month_duration_type = self.meta_schema.types['yearMonthDuration'] + year_month_duration_type = self.types['yearMonthDuration'] self.assertTrue(year_month_duration_type.is_valid('P3Y4M')) self.assertTrue(year_month_duration_type.is_valid('P15M')) self.assertTrue(year_month_duration_type.is_valid('P0Y')) @@ -263,63 +269,77 @@ class TestXsd11BuiltinTypes(TestXsd10BuiltinTypes): class TestGlobalMaps(unittest.TestCase): + @classmethod + def setUpClass(cls): + XMLSchema10.meta_schema.build() + XMLSchema11.meta_schema.build() + + @classmethod + def tearDownClass(cls): + XMLSchema10.meta_schema.clear() + XMLSchema11.meta_schema.clear() + def test_xsd_10_globals(self): - self.assertEqual(len(xsd_10_meta_schema.maps.notations), 2) - self.assertEqual(len(xsd_10_meta_schema.maps.types), 108) - self.assertEqual(len(xsd_10_meta_schema.maps.attributes), 18) - self.assertEqual(len(xsd_10_meta_schema.maps.attribute_groups), 9) - self.assertEqual(len(xsd_10_meta_schema.maps.groups), 18) - self.assertEqual(len(xsd_10_meta_schema.maps.elements), 45) - self.assertEqual(len([e.is_global for e in xsd_10_meta_schema.maps.iter_globals()]), 200) - self.assertEqual(len(xsd_10_meta_schema.maps.substitution_groups), 0) + self.assertEqual(len(XMLSchema10.meta_schema.maps.notations), 2) + self.assertEqual(len(XMLSchema10.meta_schema.maps.types), 92) + self.assertEqual(len(XMLSchema10.meta_schema.maps.attributes), 8) + self.assertEqual(len(XMLSchema10.meta_schema.maps.attribute_groups), 3) + self.assertEqual(len(XMLSchema10.meta_schema.maps.groups), 12) + self.assertEqual(len(XMLSchema10.meta_schema.maps.elements), 41) + self.assertEqual(len([e.is_global() for e in XMLSchema10.meta_schema.maps.iter_globals()]), 158) + self.assertEqual(len(XMLSchema10.meta_schema.maps.substitution_groups), 0) def test_xsd_11_globals(self): - self.assertEqual(len(xsd_11_meta_schema.maps.notations), 2) - self.assertEqual(len(xsd_11_meta_schema.maps.types), 118) - self.assertEqual(len(xsd_11_meta_schema.maps.attributes), 18) - self.assertEqual(len(xsd_11_meta_schema.maps.attribute_groups), 10) - self.assertEqual(len(xsd_11_meta_schema.maps.groups), 19) - self.assertEqual(len(xsd_11_meta_schema.maps.elements), 51) - self.assertEqual(len([e.is_global for e in xsd_11_meta_schema.maps.iter_globals()]), 218) - self.assertEqual(len(xsd_11_meta_schema.maps.substitution_groups), 1) + self.assertEqual(len(XMLSchema11.meta_schema.maps.notations), 2) + self.assertEqual(len(XMLSchema11.meta_schema.maps.types), 103) + self.assertEqual(len(XMLSchema11.meta_schema.maps.attributes), 14) + self.assertEqual(len(XMLSchema11.meta_schema.maps.attribute_groups), 4) + self.assertEqual(len(XMLSchema11.meta_schema.maps.groups), 13) + self.assertEqual(len(XMLSchema11.meta_schema.maps.elements), 47) + self.assertEqual(len([e.is_global() for e in XMLSchema11.meta_schema.maps.iter_globals()]), 183) + self.assertEqual(len(XMLSchema11.meta_schema.maps.substitution_groups), 1) def test_xsd_10_build(self): - xsd_10_meta_schema.maps.build() - self.assertEqual(len([e for e in xsd_10_meta_schema.maps.iter_globals()]), 200) - self.assertTrue(xsd_10_meta_schema.maps.built) - xsd_10_meta_schema.maps.clear() - xsd_10_meta_schema.maps.build() - self.assertTrue(xsd_10_meta_schema.maps.built) + self.assertEqual(len([e for e in XMLSchema10.meta_schema.maps.iter_globals()]), 158) + self.assertTrue(XMLSchema10.meta_schema.maps.built) + XMLSchema10.meta_schema.maps.clear() + XMLSchema10.meta_schema.maps.build() + self.assertTrue(XMLSchema10.meta_schema.maps.built) def test_xsd_11_build(self): - xsd_11_meta_schema.maps.build() - self.assertEqual(len([e for e in xsd_11_meta_schema.maps.iter_globals()]), 218) - self.assertTrue(xsd_11_meta_schema.maps.built) - xsd_11_meta_schema.maps.clear() - xsd_11_meta_schema.maps.build() - self.assertTrue(xsd_11_meta_schema.maps.built) + self.assertEqual(len([e for e in XMLSchema11.meta_schema.maps.iter_globals()]), 183) + self.assertTrue(XMLSchema11.meta_schema.maps.built) + XMLSchema11.meta_schema.maps.clear() + XMLSchema11.meta_schema.maps.build() + self.assertTrue(XMLSchema11.meta_schema.maps.built) def test_xsd_10_components(self): total_counter = 0 global_counter = 0 - for g in xsd_10_meta_schema.maps.iter_globals(): + for g in XMLSchema10.meta_schema.maps.iter_globals(): for c in g.iter_components(): total_counter += 1 - if c.is_global: + if c.is_global(): global_counter += 1 - self.assertEqual(global_counter, 200) - self.assertEqual(total_counter, 901) + self.assertEqual(global_counter, 158) + self.assertEqual(total_counter, 782) def test_xsd_11_components(self): total_counter = 0 global_counter = 0 - for g in xsd_11_meta_schema.maps.iter_globals(): + for g in XMLSchema11.meta_schema.maps.iter_globals(): for c in g.iter_components(): total_counter += 1 - if c.is_global: + if c.is_global(): global_counter += 1 - self.assertEqual(global_counter, 218) - self.assertEqual(total_counter, 1018) + self.assertEqual(global_counter, 183) + self.assertEqual(total_counter, 932) + + def test_xsd_11_restrictions(self): + all_model_type = XMLSchema11.meta_schema.types['all'] + self.assertTrue( + all_model_type.content_type.is_restriction(all_model_type.base_type.content_type) + ) if __name__ == '__main__': diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py index 901ea3a..3748ead 100644 --- a/xmlschema/tests/test_models.py +++ b/xmlschema/tests/test_models.py @@ -14,11 +14,13 @@ This module runs tests concerning model groups validation. """ import unittest +from xmlschema import XMLSchema10, XMLSchema11 from xmlschema.validators import ModelVisitor -from xmlschema.tests import XMLSchemaTestCase +from xmlschema.compat import ordered_dict_class +from xmlschema.tests import casepath, XsdValidatorTestCase -class TestModelValidation(XMLSchemaTestCase): +class TestModelValidation(XsdValidatorTestCase): # --- Test helper functions --- @@ -149,9 +151,9 @@ class TestModelValidation(XMLSchemaTestCase): self.check_stop(model) # is optional self.assertIsNone(model.element) - # --- XSD 1.0 schema --- + # --- XSD 1.0/1.1 meta-schema models --- - def test_simple_derivation_model(self): + def test_meta_simple_derivation_model(self): """ @@ -161,7 +163,7 @@ class TestModelValidation(XMLSchemaTestCase): """ - group = self.schema_class.meta_schema.groups['simpleDerivation'] + group = XMLSchema10.meta_schema.groups['simpleDerivation'] model = ModelVisitor(group) self.check_advance_true(model) # match @@ -184,8 +186,9 @@ class TestModelValidation(XMLSchemaTestCase): self.check_advance_false(model, [(group, 0, group[:])]) # not match with self.assertIsNone(model.element) - def test_simple_restriction_model(self): + def test_meta_simple_restriction_model(self): """ + @@ -209,25 +212,38 @@ class TestModelValidation(XMLSchemaTestCase): + + + + + + + + + + + """ # Sequence with an optional single element and an optional unlimited choice. group = self.schema_class.meta_schema.groups['simpleRestrictionModel'] model = ModelVisitor(group) - self.assertEqual(model.element, group[0]) - self.check_advance_true(model) # match - self.assertEqual(model.element, group[1][0][0]) - self.check_advance_false(model) # do not match - self.assertEqual(model.element, group[1][0][1]) - self.check_advance_false(model) # do not match - self.assertEqual(model.element, group[1][0][2]) - self.check_advance_true(model) # match - self.assertEqual(model.element, group[1][0][0]) - for _ in range(12): - self.check_advance_false(model) # no match for all the inner choice group "xs:facets" - self.assertIsNone(model.element) - def test_schema_model(self): + if self.schema_class.XSD_VERSION == '1.0': + self.assertEqual(model.element, group[0]) + self.check_advance_true(model) # match + self.assertEqual(model.element, group[1][0][0]) + self.check_advance_false(model) # do not match + self.assertEqual(model.element, group[1][0][1]) + self.check_advance_false(model) # do not match + self.assertEqual(model.element, group[1][0][2]) + self.check_advance_true(model) # match + self.assertEqual(model.element, group[1][0][0]) + for _ in range(12): + self.check_advance_false(model) # no match for all the inner choice group "xs:facets" + self.assertIsNone(model.element) + + def test_meta_schema_top_model(self): """ @@ -287,7 +303,7 @@ class TestModelValidation(XMLSchemaTestCase): self.check_advance_true(model) # match self.assertIsNone(model.element) - def test_attr_declaration(self): + def test_meta_attr_declarations_group(self): """ @@ -321,7 +337,7 @@ class TestModelValidation(XMLSchemaTestCase): self.check_advance(model, match) self.assertEqual(model.element, group[1]) - def test_complex_type_model(self): + def test_meta_complex_type_model(self): """ @@ -342,6 +358,20 @@ class TestModelValidation(XMLSchemaTestCase): + + + + + + + + + + + + + + """ group = self.schema_class.meta_schema.groups['complexTypeModel'] @@ -356,27 +386,31 @@ class TestModelValidation(XMLSchemaTestCase): self.check_advance_true(model) # match self.assertIsNone(model.element) - model.restart() - self.assertEqual(model.element, group[0]) - for match in [False, False, False, False, True]: - self.check_advance(model, match) # match - self.check_stop(model) - self.assertIsNone(model.element) + if self.schema_class.XSD_VERSION == '1.0': + model.restart() + self.assertEqual(model.element, group[0]) + for match in [False, False, False, False, True]: + self.check_advance(model, match) # match + self.check_stop(model) + self.assertIsNone(model.element) - model.restart() - self.assertEqual(model.element, group[0]) - for match in [False, False, False, False, True, False, True, False, False, False]: - self.check_advance(model, match) # match, match - self.assertIsNone(model.element) + model.restart() + self.assertEqual(model.element, group[0]) + for match in [False, False, False, False, True, False, True, False, False, False]: + self.check_advance(model, match) # match, match + self.assertIsNone(model.element) - def test_schema_document_model(self): + def test_meta_schema_document_model(self): group = self.schema_class.meta_schema.elements['schema'].type.content_type # A schema model with a wrong tag model = ModelVisitor(group) - self.assertEqual(model.element, group[0][0]) - self.check_advance_false(model) # eg. anyAttribute - self.check_stop(model) + if self.schema_class.XSD_VERSION == '1.0': + self.assertEqual(model.element, group[0][0]) + self.check_advance_false(model) # eg. anyAttribute + self.check_stop(model) + else: + self.assertEqual(model.element, group[0][0][0]) # # Tests on schema test_cases/features/models/models.xsd @@ -465,10 +499,25 @@ class TestModelValidation(XMLSchemaTestCase): self.assertEqual(model.element, group[0][0]) self.check_stop(model) + def test_model_group8(self): + group = self.models_schema.groups['group8'] + + model = ModelVisitor(group) + self.assertEqual(model.element, group[0][0]) + self.check_advance_true(model) # match choice with + self.check_advance_false(model) + self.assertEqual(model.element, group[0][1]) + self.check_advance_true(model) # match choice with + self.assertEqual(model.element, group[0][2]) + self.check_advance_true(model) # match choice with + self.assertEqual(model.element, group[0][3]) + self.check_advance_true(model) # match choice with + self.assertIsNone(model.element) + # # Tests on issues def test_issue_086(self): - issue_086_xsd = self.casepath('issues/issue_086/issue_086.xsd') + issue_086_xsd = casepath('issues/issue_086/issue_086.xsd') schema = self.schema_class(issue_086_xsd) group = schema.types['Foo'].content_type @@ -524,6 +573,231 @@ class TestModelValidation(XMLSchemaTestCase): self.check_stop(model) +class TestModelValidation11(TestModelValidation): + schema_class = XMLSchema11 + + +class TestModelBasedSorting(XsdValidatorTestCase): + + def test_sort_content(self): + # test of ModelVisitor's sort_content/iter_unordered_content + schema = self.get_schema(""" + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + self.assertListEqual( + model.sort_content([('B2', 10), ('B1', 'abc'), ('B3', True)]), + [('B1', 'abc'), ('B2', 10), ('B3', True)] + ) + self.assertListEqual( + model.sort_content([('B3', True), ('B2', 10), ('B1', 'abc')]), + [('B1', 'abc'), ('B2', 10), ('B3', True)] + ) + self.assertListEqual( + model.sort_content([('B2', 10), ('B4', None), ('B1', 'abc'), ('B3', True)]), + [('B1', 'abc'), ('B2', 10), ('B3', True), ('B4', None)] + ) + content = [('B2', 10), ('B4', None), ('B1', 'abc'), (1, 'hello'), ('B3', True)] + self.assertListEqual( + model.sort_content(content), + [(1, 'hello'), ('B1', 'abc'), ('B2', 10), ('B3', True), ('B4', None)] + ) + content = [(2, 'world!'), ('B2', 10), ('B4', None), ('B1', 'abc'), (1, 'hello'), ('B3', True)] + self.assertListEqual( + model.sort_content(content), + [(1, 'hello'), ('B1', 'abc'), (2, 'world!'), ('B2', 10), ('B3', True), ('B4', None)] + ) + + # With a dict-type argument + content = ordered_dict_class([('B2', [10]), ('B1', ['abc']), ('B3', [True])]) + self.assertListEqual( + model.sort_content(content), [('B1', 'abc'), ('B2', 10), ('B3', True)] + ) + content = ordered_dict_class([('B2', [10]), ('B1', ['abc']), ('B3', [True]), (1, 'hello')]) + self.assertListEqual( + model.sort_content(content), [(1, 'hello'), ('B1', 'abc'), ('B2', 10), ('B3', True)] + ) + + # With partial content + self.assertListEqual(model.sort_content([]), []) + self.assertListEqual(model.sort_content([('B1', 'abc')]), [('B1', 'abc')]) + self.assertListEqual(model.sort_content([('B2', 10)]), [('B2', 10)]) + self.assertListEqual(model.sort_content([('B3', True)]), [('B3', True)]) + self.assertListEqual( + model.sort_content([('B3', True), ('B1', 'abc')]), [('B1', 'abc'), ('B3', True)] + ) + self.assertListEqual( + model.sort_content([('B2', 10), ('B1', 'abc')]), [('B1', 'abc'), ('B2', 10)] + ) + self.assertListEqual( + model.sort_content([('B3', True), ('B2', 10)]), [('B2', 10), ('B3', True)] + ) + + def test_iter_collapsed_content_with_optional_elements(self): + schema = self.get_schema(""" + + + + + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B3', 10), ('B4', None), ('B5', True), ('B6', 'alpha'), ('B7', 20)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B5', True), ('B6', 'alpha'), ('B7', 20)] # Missing B4 + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + def test_iter_collapsed_content_with_repeated_elements(self): + schema = self.get_schema(""" + + + + + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [ + ('B3', 10), ('B4', None), ('B5', True), ('B5', False), ('B6', 'alpha'), ('B7', 20) + ] + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True), + ('B5', False), ('B6', 'alpha'), ('B7', 20), ('B7', 30)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + content = [('B3', 10), ('B3', 11), ('B3', 12), ('B4', None), ('B5', True), ('B5', False)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), content + ) + + def test_iter_collapsed_content_with_repeated_groups(self): + schema = self.get_schema(""" + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)] + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4)] + ) + + # Model broken by unknown element at start + content = [('X', None), ('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('X', None), ('B1', 2), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('B1', 2), ('X', None), ('B2', 3), ('B2', 4)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('X', None), ('B2', 4)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('X', None), ('B2', 4)] + ) + + content = [('B1', 1), ('B1', 2), ('B2', 3), ('B2', 4), ('X', None)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 1), ('B2', 3), ('B1', 2), ('B2', 4), ('X', None)] + ) + + def test_iter_collapsed_content_with_single_elements(self): + schema = self.get_schema(""" + + + + + + + + + """) + + model = ModelVisitor(schema.types['A_type'].content_type) + + content = [('B1', 'abc'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B3', False), ('B1', 'abc'), ('B2', 10)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 'abc'), ('B3', False), ('B2', 10)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('B1', 'abc'), ('B1', 'def'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual( + list(model.iter_collapsed_content(content)), + [('B1', 'abc'), ('B2', 10), ('B3', False), ('B1', 'def')] + ) + + content = [('B1', 'abc'), ('B2', 10), ('X', None)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + content = [('X', None), ('B1', 'abc'), ('B2', 10), ('B3', False)] + model.restart() + self.assertListEqual(list(model.iter_collapsed_content(content)), content) + + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/tests/test_package.py b/xmlschema/tests/test_package.py index da8958c..b4a03c2 100644 --- a/xmlschema/tests/test_package.py +++ b/xmlschema/tests/test_package.py @@ -17,177 +17,8 @@ import os import re import importlib import platform -import sys -import decimal -import subprocess - -try: - import memory_profiler -except ImportError: - memory_profiler = None -@unittest.skipIf(sys.version_info < (3,), "In Python 2 ElementTree is not overwritten by cElementTree") -class TestElementTree(unittest.TestCase): - - def test_element_string_serialization(self): - ElementTree = importlib.import_module('xml.etree.ElementTree') - xmlschema_etree = importlib.import_module('xmlschema.etree') - - elem = ElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - elem = xmlschema_etree.ElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - elem = xmlschema_etree.PyElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - - def test_import_element_tree_before(self): - ElementTree = importlib.import_module('xml.etree.ElementTree') - xmlschema_etree = importlib.import_module('xmlschema.etree') - - self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, msg="cElementTree not available!") - elem = xmlschema_etree.PyElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) - self.assertIs(xmlschema_etree.ElementTree, ElementTree) - - def test_import_element_tree_after(self): - xmlschema_etree = importlib.import_module('xmlschema.etree') - ElementTree = importlib.import_module('xml.etree.ElementTree') - - self.assertIsNot(ElementTree.Element, ElementTree._Element_Py, msg="cElementTree not available!") - elem = xmlschema_etree.PyElementTree.Element('element') - self.assertEqual(xmlschema_etree.etree_tostring(elem), '') - self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree) - self.assertIs(xmlschema_etree.ElementTree, ElementTree) - - def test_element_tree_import_script(self): - test_dir = os.path.dirname(__file__) or '.' - - cmd = [os.path.join(test_dir, 'check_etree_import.py')] - process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output = process.stdout.decode('utf-8') - self.assertTrue("\nTest OK:" in output, msg="Wrong import of ElementTree after xmlschema") - - cmd.append('--before') - process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output = process.stdout.decode('utf-8') - self.assertTrue("\nTest OK:" in output, msg="Wrong import of ElementTree before xmlschema") - - def test_safe_xml_parser(self): - test_dir = os.path.dirname(__file__) or '.' - xmlschema_etree = importlib.import_module('xmlschema.etree') - parser = xmlschema_etree.SafeXMLParser(target=xmlschema_etree.PyElementTree.TreeBuilder()) - PyElementTree = xmlschema_etree.PyElementTree - - xml_file = os.path.join(test_dir, 'test_cases/resources/with_entity.xml') - elem = xmlschema_etree.ElementTree.parse(xml_file).getroot() - self.assertEqual(elem.text, 'abc') - self.assertRaises( - PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser - ) - - xml_file = os.path.join(test_dir, 'test_cases/resources/unused_external_entity.xml') - elem = xmlschema_etree.ElementTree.parse(xml_file).getroot() - self.assertEqual(elem.text, 'abc') - self.assertRaises( - PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser - ) - - xml_file = os.path.join(test_dir, 'test_cases/resources/external_entity.xml') - self.assertRaises(xmlschema_etree.ParseError, xmlschema_etree.ElementTree.parse, xml_file) - self.assertRaises( - PyElementTree.ParseError, xmlschema_etree.ElementTree.parse, xml_file, parser=parser - ) - - -@unittest.skipIf(memory_profiler is None or sys.version_info[:2] != (3, 7), "Test only with Python 3.7") -class TestMemoryUsage(unittest.TestCase): - - @staticmethod - def check_memory_profile(output): - """Check the output of a memory memory profile run on a function.""" - mem_usage = [] - func_num = 0 - for line in output.split('\n'): - parts = line.split() - if 'def' in parts: - func_num += 1 - if not parts or not parts[0].isdigit() or len(parts) == 1 \ - or not parts[1].replace('.', '').isdigit(): - continue - mem_usage.append(decimal.Decimal(parts[1])) - - if func_num > 1: - raise ValueError("Cannot the a memory profile output of more than one function!") - return max(v - mem_usage[0] for v in mem_usage[1:]) - - @unittest.skip - def test_package_memory_usage(self): - test_dir = os.path.dirname(__file__) or '.' - cmd = [os.path.join(test_dir, 'check_memory.py'), '1'] - output = subprocess.check_output(cmd, universal_newlines=True) - package_mem = self.check_memory_profile(output) - self.assertLess(package_mem, 20) - - def test_element_tree_memory_usage(self): - test_dir = os.path.dirname(__file__) or '.' - xsd10_schema_file = os.path.join( - os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' - ) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '2', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - parse_mem = self.check_memory_profile(output) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '3', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - iterparse_mem = self.check_memory_profile(output) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '4', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - lazy_iterparse_mem = self.check_memory_profile(output) - - self.assertLess(parse_mem, 2) - self.assertLessEqual(lazy_iterparse_mem, parse_mem / 2) - self.assertLessEqual(lazy_iterparse_mem, iterparse_mem) - - def test_decode_memory_usage(self): - test_dir = os.path.dirname(__file__) or '.' - xsd10_schema_file = os.path.join( - os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' - ) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '5', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - decode_mem = self.check_memory_profile(output) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '6', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - lazy_decode_mem = self.check_memory_profile(output) - - self.assertLess(decode_mem, 2) - self.assertLessEqual(lazy_decode_mem, decode_mem / decimal.Decimal(1.5)) - - def test_validate_memory_usage(self): - test_dir = os.path.dirname(__file__) or '.' - xsd10_schema_file = os.path.join( - os.path.dirname(os.path.abspath(test_dir)), 'validators/schemas/XSD_1.0/XMLSchema.xsd' - ) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '7', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - validate_mem = self.check_memory_profile(output) - - cmd = [os.path.join(test_dir, 'check_memory.py'), '8', xsd10_schema_file] - output = subprocess.check_output(cmd, universal_newlines=True) - lazy_validate_mem = self.check_memory_profile(output) - - self.assertLess(validate_mem, 2) - self.assertLessEqual(lazy_validate_mem, validate_mem / 2) - - -@unittest.skipIf(platform.system() == 'Windows', "Skip packaging test on Windows platform.") class TestPackaging(unittest.TestCase): @classmethod diff --git a/xmlschema/tests/test_regex.py b/xmlschema/tests/test_regex.py index c499644..b5bb6c8 100644 --- a/xmlschema/tests/test_regex.py +++ b/xmlschema/tests/test_regex.py @@ -16,6 +16,7 @@ from __future__ import unicode_literals import unittest import sys import re +from itertools import chain from unicodedata import category from xmlschema.exceptions import XMLSchemaValueError, XMLSchemaRegexError @@ -94,6 +95,19 @@ class TestUnicodeSubset(unittest.TestCase): cds.add((0, 10)) self.assertEqual(list(cds.complement()), [(12, 50), (51, 90), (91, sys.maxunicode + 1)]) + cds1 = UnicodeSubset(chain( + UNICODE_CATEGORIES['L'].code_points, + UNICODE_CATEGORIES['M'].code_points, + UNICODE_CATEGORIES['N'].code_points, + UNICODE_CATEGORIES['S'].code_points + )) + cds2 = UnicodeSubset(chain( + UNICODE_CATEGORIES['C'].code_points, + UNICODE_CATEGORIES['P'].code_points, + UNICODE_CATEGORIES['Z'].code_points + )) + self.assertListEqual(cds1.code_points, UnicodeSubset(cds2.complement()).code_points) + def test_union_and_intersection(self): cds1 = UnicodeSubset([50, (90, 200), 10]) cds2 = UnicodeSubset([10, 51, (89, 150), 90]) @@ -132,14 +146,14 @@ class TestUnicodeCategories(unittest.TestCase): self.assertEqual(min([min(s) for s in categories.values()]), 0) self.assertEqual(max([max(s) for s in categories.values()]), sys.maxunicode) base_sets = [set(v) for k, v in categories.items() if len(k) > 1] - self.assertFalse(any([s.intersection(t) for s in base_sets for t in base_sets if s != t])) + self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t)) def test_unicode_categories(self): self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1), sys.maxunicode + 1) self.assertEqual(min([min(s) for s in UNICODE_CATEGORIES.values()]), 0) self.assertEqual(max([max(s) for s in UNICODE_CATEGORIES.values()]), sys.maxunicode) base_sets = [set(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1] - self.assertFalse(any([s.intersection(t) for s in base_sets for t in base_sets if s != t])) + self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t)) @unittest.skipIf(not ((3, 7) <= sys.version_info < (3, 8)), "Test only for Python 3.7") def test_unicodedata_category(self): @@ -336,35 +350,50 @@ class TestPatterns(unittest.TestCase): pattern = re.compile(regex) self.assertEqual(pattern.search('x11').group(0), 'x11') self.assertIsNone(pattern.search('3a')) - + regex = get_python_regex(r"\w*") pattern = re.compile(regex) self.assertEqual(pattern.search('aA_x7').group(0), 'aA_x7') self.assertIsNone(pattern.search('.')) self.assertIsNone(pattern.search('-')) - + regex = get_python_regex(r"\W*") pattern = re.compile(regex) self.assertIsNone(pattern.search('aA_x7')) self.assertEqual(pattern.search('.-').group(0), '.-') - + regex = get_python_regex(r"\d*") pattern = re.compile(regex) self.assertEqual(pattern.search('6410').group(0), '6410') self.assertIsNone(pattern.search('a')) self.assertIsNone(pattern.search('-')) - + regex = get_python_regex(r"\D*") pattern = re.compile(regex) self.assertIsNone(pattern.search('6410')) self.assertEqual(pattern.search('a').group(0), 'a') self.assertEqual(pattern.search('-').group(0), '-') + # Pull Request 114 + regex = get_python_regex(r"[\w]{0,5}") + pattern = re.compile(regex) + self.assertEqual(pattern.search('abc').group(0), 'abc') + self.assertIsNone(pattern.search('.')) + + regex = get_python_regex(r"[\W]{0,5}") + pattern = re.compile(regex) + self.assertEqual(pattern.search('.').group(0), '.') + self.assertIsNone(pattern.search('abc')) + def test_empty_character_group_repr(self): regex = get_python_regex('[a-[a-f]]') self.assertEqual(regex, r'^([^\w\W])$') self.assertRaises(XMLSchemaRegexError, get_python_regex, '[]') + def test_character_class_range(self): + regex = get_python_regex('[bc-]') + self.assertEqual(regex, r'^([\-bc])$') + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py index b259c5c..f5dbd5d 100644 --- a/xmlschema/tests/test_resources.py +++ b/xmlschema/tests/test_resources.py @@ -14,7 +14,6 @@ This module runs tests concerning resources. """ import unittest import os -import platform try: from pathlib import PureWindowsPath, PurePath @@ -25,9 +24,11 @@ from xmlschema import ( fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations, load_xml_resource, XMLResource, XMLSchemaURLError ) -from xmlschema.tests import XMLSchemaTestCase, SKIP_REMOTE_TESTS +from xmlschema.tests import casepath from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO -from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, is_etree_element, etree_element, py_etree_element +from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, \ + etree_element, py_etree_element +from xmlschema.helpers import is_etree_element def is_windows_path(path): @@ -39,7 +40,17 @@ def add_leading_slash(path): return '/' + path if path and path[0] not in ('/', '\\') else path -class TestResources(XMLSchemaTestCase): +class TestResources(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.vh_dir = casepath('examples/vehicles') + cls.vh_xsd_file = casepath('examples/vehicles/vehicles.xsd') + cls.vh_xml_file = casepath('examples/vehicles/vehicles.xml') + + cls.col_dir = casepath('examples/collection') + cls.col_xsd_file = casepath('examples/collection/collection.xsd') + cls.col_xml_file = casepath('examples/collection/collection.xml') def check_url(self, url, expected): url_parts = urlsplit(url) @@ -108,13 +119,13 @@ class TestResources(XMLSchemaTestCase): self.assertEqual(normalize_url('dir2/schema.xsd', '////root/dir1'), 'file:///root/dir1/dir2/schema.xsd') def test_fetch_resource(self): - wrong_path = self.casepath('resources/dummy_file.txt') + wrong_path = casepath('resources/dummy_file.txt') self.assertRaises(XMLSchemaURLError, fetch_resource, wrong_path) - right_path = self.casepath('resources/dummy file.txt') + right_path = casepath('resources/dummy file.txt') self.assertTrue(fetch_resource(right_path).endswith('dummy file.txt')) def test_fetch_namespaces(self): - self.assertFalse(fetch_namespaces(self.casepath('resources/malformed.xml'))) + self.assertFalse(fetch_namespaces(casepath('resources/malformed.xml'))) def test_fetch_schema_locations(self): locations = fetch_schema_locations(self.col_xml_file) @@ -301,15 +312,15 @@ class TestResources(XMLSchemaTestCase): resource = XMLResource(self.vh_xml_file, defuse='always') self.assertIsInstance(resource.root, py_etree_element) - xml_file = self.casepath('resources/with_entity.xml') + xml_file = casepath('resources/with_entity.xml') self.assertIsInstance(XMLResource(xml_file), XMLResource) self.assertRaises(PyElementTree.ParseError, XMLResource, xml_file, defuse='always') - xml_file = self.casepath('resources/unused_external_entity.xml') + xml_file = casepath('resources/unused_external_entity.xml') self.assertIsInstance(XMLResource(xml_file), XMLResource) self.assertRaises(PyElementTree.ParseError, XMLResource, xml_file, defuse='always') - xml_file = self.casepath('resources/external_entity.xml') + xml_file = casepath('resources/external_entity.xml') self.assertIsInstance(XMLResource(xml_file), XMLResource) self.assertRaises(PyElementTree.ParseError, XMLResource, xml_file, defuse='always') @@ -430,7 +441,7 @@ class TestResources(XMLSchemaTestCase): self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'}) self.assertFalse(schema_file.closed) - + if __name__ == '__main__': from xmlschema.tests import print_test_header diff --git a/xmlschema/tests/test_schemas.py b/xmlschema/tests/test_schemas.py index 2f86602..a287501 100644 --- a/xmlschema/tests/test_schemas.py +++ b/xmlschema/tests/test_schemas.py @@ -10,701 +10,23 @@ # @author Davide Brunato # """ -This module runs tests concerning the building of XSD schemas with the 'xmlschema' package. +Loads and runs tests concerning the building of XSD schemas with the 'xmlschema' package. """ -from __future__ import print_function, unicode_literals -import unittest -import pdb -import os -import pickle -import time -import warnings - -import xmlschema -from xmlschema import XMLSchemaBase, XMLSchemaParseError, XMLSchemaModelError, \ - XMLSchemaIncludeWarning, XMLSchemaImportWarning -from xmlschema.compat import PY3, unicode_type -from xmlschema.etree import lxml_etree, etree_element, py_etree_element -from xmlschema.qnames import XSD_LIST, XSD_UNION, XSD_ELEMENT, XSI_TYPE -from xmlschema.tests import tests_factory, SchemaObserver, XMLSchemaTestCase -from xmlschema.validators import XsdValidator, XMLSchema11 -from xmlschema.xpath import ElementPathContext - - -class TestXMLSchema10(XMLSchemaTestCase): - - def check_schema(self, source, expected=None, **kwargs): - """ - Create a schema for a test case. - - :param source: A relative path or a root Element or a portion of schema for a template. - :param expected: If it's an Exception class test the schema for raise an error. \ - Otherwise build the schema and test a condition if expected is a callable, or make \ - a substring test if it's not `None` (maybe a string). Then returns the schema instance. - """ - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, self.schema_class, self.retrieve_schema_source(source), **kwargs) - else: - schema = self.schema_class(self.retrieve_schema_source(source), **kwargs) - if callable(expected): - self.assertTrue(expected(schema)) - return schema - - def check_complex_restriction(self, base, restriction, expected=None, **kwargs): - content = 'complex' if self.content_pattern.search(base) else 'simple' - source = """ - - {0} - - - <{1}Content> - - {2} - - - - """.format(base.strip(), content, restriction.strip()) - self.check_schema(source, expected, **kwargs) - - def test_schema_copy(self): - schema = self.vh_schema.copy() - self.assertNotEqual(id(self.vh_schema), id(schema)) - self.assertNotEqual(id(self.vh_schema.namespaces), id(schema.namespaces)) - self.assertNotEqual(id(self.vh_schema.maps), id(schema.maps)) - - def test_resolve_qname(self): - schema = self.schema_class(""" - - - """) - self.assertEqual(schema.resolve_qname('xs:element'), XSD_ELEMENT) - self.assertEqual(schema.resolve_qname('xsi:type'), XSI_TYPE) - - self.assertEqual(schema.resolve_qname(XSI_TYPE), XSI_TYPE) - self.assertEqual(schema.resolve_qname('element'), 'element') - self.assertRaises(ValueError, schema.resolve_qname, '') - self.assertRaises(ValueError, schema.resolve_qname, 'xsi:a type ') - self.assertRaises(ValueError, schema.resolve_qname, 'xml::lang') - - def test_simple_types(self): - # Issue #54: set list or union schema element. - xs = self.check_schema(""" - - - - - - - - - """) - xs.types['test_list'].elem = xs.root[0] # elem.tag == 'simpleType' - self.assertEqual(xs.types['test_list'].elem.tag, XSD_LIST) - xs.types['test_union'].elem = xs.root[1] # elem.tag == 'simpleType' - self.assertEqual(xs.types['test_union'].elem.tag, XSD_UNION) - - def test_wrong_includes_and_imports(self): - - with warnings.catch_warnings(record=True) as context: - warnings.simplefilter("always") - self.check_schema(""" - - - - - - """) - self.assertEqual(len(context), 3, "Wrong number of include/import warnings") - self.assertEqual(context[0].category, XMLSchemaIncludeWarning) - self.assertEqual(context[1].category, XMLSchemaIncludeWarning) - self.assertEqual(context[2].category, XMLSchemaImportWarning) - self.assertTrue(str(context[0].message).startswith("Include")) - self.assertTrue(str(context[1].message).startswith("Redefine")) - self.assertTrue(str(context[2].message).startswith("Namespace import")) - - def test_wrong_references(self): - # Wrong namespace for element type's reference - self.check_schema(""" - - - - - """, XMLSchemaParseError) - - def test_restriction_has_annotation(self): - # Wrong namespace for element type's reference - schema = self.check_schema(""" - - - stuff - - - - - """) - self.assertIsNotNone(schema.types["Magic"].annotation) - - def test_facets(self): - # Issue #55 and a near error (derivation from xs:integer) - self.check_schema(""" - - - - - - - - - - - - - """) - self.check_schema(""" - - - - - - - """, xmlschema.XMLSchemaParseError) - - # Issue #56 - self.check_schema(""" - - - - - - - - - - - """) - - def test_element_restrictions(self): - base = """ - - - - - - """ - self.check_complex_restriction( - base, restriction=""" - - - - - - """) - - self.check_complex_restriction( - base, restriction=""" - - - - - - """, expected=XMLSchemaParseError) - - self.check_complex_restriction( - base, restriction=""" - - - - - - """, expected=XMLSchemaParseError) - - self.check_complex_restriction( - base, restriction=""" - - - - - - """, expected=XMLSchemaParseError) - - self.check_complex_restriction( - base, restriction=""" - - - - - - """, expected=XMLSchemaParseError) - - def test_sequence_group_restriction(self): - # Meaningless sequence group - base = """ - - - - - - - """ - self.check_complex_restriction( - base, '' - ) - self.check_complex_restriction( - base, '', XMLSchemaParseError - ) - - base = """ - - - - - """ - self.check_complex_restriction(base, '') - self.check_complex_restriction(base, '', XMLSchemaParseError) - self.check_complex_restriction(base, '', XMLSchemaParseError) - self.check_complex_restriction( - base, '' - ) - self.check_complex_restriction( - base, '', XMLSchemaParseError - ) - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - - def test_all_group_restriction(self): - base = """ - - - - - - """ - self.check_complex_restriction(base, '') - self.check_complex_restriction( - base, '', XMLSchemaParseError - ) - self.check_complex_restriction( - base, '' - ) - self.check_complex_restriction( - base, '', - ) - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - self.check_complex_restriction( - base, '', XMLSchemaParseError - ) - - base = """ - - - - """ - self.check_complex_restriction(base, '', XMLSchemaParseError) - - def test_choice_group_restriction(self): - base = """ - - - - - - """ - self.check_complex_restriction(base, '') - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - - self.check_complex_restriction( - base, '', - ) - - def test_occurs_restriction(self): - base = """ - - - - """ - self.check_complex_restriction( - base, '') - self.check_complex_restriction( - base, '') - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - self.check_complex_restriction( - base, '', - XMLSchemaParseError - ) - - def test_union_restrictions(self): - # Wrong union restriction (not admitted facets, see issue #67) - self.check_schema(r""" - - - - - - - - - - - - - - - """, XMLSchemaParseError) - - def test_final_attribute(self): - self.check_schema(""" - - - - """) - - def test_wrong_attribute(self): - self.check_schema(""" - - - - - """, XMLSchemaParseError) - - def test_wrong_attribute_group(self): - self.check_schema(""" - - - - - """, XMLSchemaParseError) - schema = self.check_schema(""" - - - - - """, validation='lax') - self.assertTrue(isinstance(schema.all_errors[1], XMLSchemaParseError)) - - def test_date_time_facets(self): - self.check_schema(""" - - - - - - """) - - self.check_schema(""" - - - - - - """) - - def test_base_schemas(self): - from xmlschema.validators.schema import XML_SCHEMA_FILE - self.schema_class(XML_SCHEMA_FILE) - - def test_recursive_complex_type(self): - schema = self.schema_class(""" - - - - - - - - """) - self.assertEqual(schema.elements['elemA'].type, schema.types['typeA']) - - def test_upa_violations(self): - self.check_schema(""" - - - - - - - - - """, XMLSchemaModelError) - - self.check_schema(""" - - - - - - - - - """) - - def test_root_elements(self): - # Test issue #107 fix - schema = self.schema_class(""" - - - - - - - - - """) - - self.assertEqual(set(schema.root_elements), {schema.elements['root1'], schema.elements['root2']}) - - def test_is_restriction_method(self): - # Test issue #111 fix - schema = self.schema_class(source=os.path.join(self.test_cases_dir, 'issues/issue_111/issue_111.xsd')) - extended_header_def = schema.types['extendedHeaderDef'] - self.assertTrue(extended_header_def.is_derived(schema.types['blockDef'])) - - -class TestXMLSchema11(TestXMLSchema10): - - schema_class = XMLSchema11 - - def test_explicit_timezone_facet(self): - schema = self.check_schema(""" - - - - - - - - - - - - - - - - """) - self.assertTrue(schema.types['req-tz-date'].is_valid('2002-10-10-05:00')) - self.assertTrue(schema.types['req-tz-date'].is_valid('2002-10-10Z')) - self.assertFalse(schema.types['req-tz-date'].is_valid('2002-10-10')) - - def test_assertion_facet(self): - self.check_schema(""" - - - - - """, XMLSchemaParseError) - - schema = self.check_schema(""" - - - - - """) - self.assertTrue(schema.types['MeasureType'].is_valid('10')) - self.assertFalse(schema.types['MeasureType'].is_valid('-1.5')) - - self.check_schema(""" - - - - - """, XMLSchemaParseError) - - schema = self.check_schema(""" - - - - - """) - self.assertTrue(schema.types['RestrictedDateTimeType'].is_valid('2000-01-01T12:00:00')) - - schema = self.check_schema(""" - - - - - """) - self.assertTrue(schema.types['Percentage'].is_valid('10')) - self.assertTrue(schema.types['Percentage'].is_valid('100')) - self.assertTrue(schema.types['Percentage'].is_valid('0')) - self.assertFalse(schema.types['Percentage'].is_valid('-1')) - self.assertFalse(schema.types['Percentage'].is_valid('101')) - self.assertFalse(schema.types['Percentage'].is_valid('90.1')) - - def test_complex_type_assertion(self): - schema = self.check_schema(""" - - - - - """) - - xsd_type = schema.types['intRange'] - xsd_type.decode(etree_element('a', attrib={'min': '10', 'max': '19'})) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '10', 'max': '19'}))) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '19', 'max': '19'}))) - self.assertFalse(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '19'}))) - self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '100'}))) - - def test_open_content(self): - self.check_schema(""" - - - - - - - - - - - - - - """) - - -def make_schema_test_class(test_file, test_args, test_num, schema_class, check_with_lxml): - """ - Creates a schema test class. - - :param test_file: the schema test file path. - :param test_args: line arguments for test case. - :param test_num: a positive integer number associated with the test case. - :param schema_class: the schema class to use. - :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \ - Works only for XSD 1.0 tests. - """ - xsd_file = os.path.relpath(test_file) - - # Extract schema test arguments - expected_errors = test_args.errors - expected_warnings = test_args.warnings - inspect = test_args.inspect - locations = test_args.locations - defuse = test_args.defuse - debug_mode = test_args.debug - - class TestSchema(XMLSchemaTestCase): - - @classmethod - def setUpClass(cls): - cls.schema_class = schema_class - cls.errors = [] - cls.longMessage = True - - if debug_mode: - print("\n##\n## Testing %r schema in debug mode.\n##" % xsd_file) - pdb.set_trace() - - def check_schema(self): - if expected_errors > 0: - xs = schema_class(xsd_file, validation='lax', locations=locations, defuse=defuse) - else: - xs = schema_class(xsd_file, locations=locations, defuse=defuse) - self.errors.extend(xs.maps.all_errors) - - if inspect: - components_ids = set([id(c) for c in xs.maps.iter_components()]) - missing = [c for c in SchemaObserver.components if id(c) not in components_ids] - if any([c for c in missing]): - raise ValueError("schema missing %d components: %r" % (len(missing), missing)) - - # Pickling test (only for Python 3, skip inspected schema classes test) - if not inspect and PY3: - try: - obj = pickle.dumps(xs) - deserialized_schema = pickle.loads(obj) - except pickle.PicklingError: - # Don't raise if some schema parts (eg. a schema loaded from remote) - # are built with the SafeXMLParser that uses pure Python elements. - for e in xs.maps.iter_components(): - elem = getattr(e, 'elem', getattr(e, 'root', None)) - if isinstance(elem, py_etree_element): - break - else: - raise - else: - self.assertTrue(isinstance(deserialized_schema, XMLSchemaBase)) - self.assertEqual(xs.built, deserialized_schema.built) - - # XPath API tests - if not inspect and not self.errors: - context = ElementPathContext(xs) - elements = [x for x in xs.iter()] - context_elements = [x for x in context.iter() if isinstance(x, XsdValidator)] - self.assertEqual(context_elements, [x for x in context.iter_descendants()]) - self.assertEqual(context_elements, elements) - - def check_lxml_schema(self, xmlschema_time): - start_time = time.time() - lxs = lxml_etree.parse(xsd_file) - try: - lxml_etree.XMLSchema(lxs.getroot()) - except lxml_etree.XMLSchemaParseError as err: - if not self.errors: - print("\nSchema error with lxml.etree.XMLSchema for file {!r} ({}): {}".format( - xsd_file, self.__class__.__name__, unicode_type(err) - )) - else: - if self.errors: - print("\nUnrecognized errors with lxml.etree.XMLSchema for file {!r} ({}): {}".format( - xsd_file, self.__class__.__name__, - '\n++++++\n'.join([unicode_type(e) for e in self.errors]) - )) - lxml_schema_time = time.time() - start_time - if lxml_schema_time >= xmlschema_time: - print( - "\nSlower lxml.etree.XMLSchema ({:.3f}s VS {:.3f}s) with file {!r} ({})".format( - lxml_schema_time, xmlschema_time, xsd_file, self.__class__.__name__ - )) - - def test_xsd_schema(self): - if inspect: - SchemaObserver.clear() - del self.errors[:] - - start_time = time.time() - if expected_warnings > 0: - with warnings.catch_warnings(record=True) as ctx: - warnings.simplefilter("always") - self.check_schema() - self.assertEqual(len(ctx), expected_warnings, - "%r: Wrong number of include/import warnings" % xsd_file) - else: - self.check_schema() - - # Check with lxml.etree.XMLSchema class - if check_with_lxml and lxml_etree is not None: - self.check_lxml_schema(xmlschema_time=time.time() - start_time) - self.check_errors(xsd_file, expected_errors) - - TestSchema.__name__ = TestSchema.__qualname__ = str('TestSchema{0:03}'.format(test_num)) - return TestSchema - - -# Creates schema tests from XSD files -globals().update(tests_factory(make_schema_test_class, 'xsd')) - - if __name__ == '__main__': + import unittest + import os + from xmlschema.tests import print_test_header + from xmlschema.tests.test_factory import tests_factory, make_schema_test_class + + def load_tests(loader, tests, pattern): + validators_dir = os.path.join(os.path.dirname(__file__), 'validators') + validators_tests = loader.discover(start_dir=validators_dir, pattern=pattern or '*') + tests.addTests(validators_tests) + return tests + + # Creates schema tests from XSD files + globals().update(tests_factory(make_schema_test_class, 'xsd')) print_test_header() unittest.main() diff --git a/xmlschema/tests/test_validators.py b/xmlschema/tests/test_validators.py index efdc5ca..5ad6a69 100644 --- a/xmlschema/tests/test_validators.py +++ b/xmlschema/tests/test_validators.py @@ -10,1439 +10,23 @@ # @author Davide Brunato # """ -This module runs tests concerning the validation/decoding/encoding of XML files. +Loads and runs tests concerning the validation/decoding/encoding of XML files. """ -import unittest -import pdb -import os -import sys -import pickle -from decimal import Decimal -import base64 -import warnings -from elementpath import datatypes - -import xmlschema -from xmlschema import ( - XMLSchemaEncodeError, XMLSchemaValidationError, ParkerConverter, - BadgerFishConverter, AbderaConverter, JsonMLConverter -) -from xmlschema.converters import UnorderedConverter -from xmlschema.compat import unicode_type, ordered_dict_class -from xmlschema.etree import etree_element, etree_tostring, is_etree_element, ElementTree, \ - etree_elements_assert_equal, lxml_etree, lxml_etree_element -from xmlschema.exceptions import XMLSchemaValueError -from xmlschema.validators.exceptions import XMLSchemaChildrenValidationError -from xmlschema.helpers import local_name -from xmlschema.qnames import XSI_TYPE -from xmlschema.resources import fetch_namespaces -from xmlschema.tests import XMLSchemaTestCase, tests_factory -from xmlschema.validators import XMLSchema11 - -_VEHICLES_DICT = { - '@xmlns:vh': 'http://example.com/vehicles', - '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - '@xsi:schemaLocation': 'http://example.com/vehicles vehicles.xsd', - 'vh:cars': { - 'vh:car': [ - {'@make': 'Porsche', '@model': '911'}, - {'@make': 'Porsche', '@model': '911'} - ]}, - 'vh:bikes': { - 'vh:bike': [ - {'@make': 'Harley-Davidson', '@model': 'WL'}, - {'@make': 'Yamaha', '@model': 'XS650'} - ]} -} - -_VEHICLES_DICT_ALT = [ - {'vh:cars': [ - {'vh:car': None, '@make': 'Porsche', '@model': '911'}, - {'vh:car': None, '@make': 'Porsche', '@model': '911'} - ]}, - {'vh:bikes': [ - {'vh:bike': None, '@make': 'Harley-Davidson', '@model': 'WL'}, - {'vh:bike': None, '@make': 'Yamaha', '@model': 'XS650'} - ]}, - {'@xsi:schemaLocation': 'http://example.com/vehicles vehicles.xsd'} -] - -_COLLECTION_DICT = { - '@xmlns:col': 'http://example.com/ns/collection', - '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd', - 'object': [{ - '@available': True, - '@id': 'b0836217462', - 'author': { - '@id': 'PAR', - 'born': '1841-02-25', - 'dead': '1919-12-03', - 'name': 'Pierre-Auguste Renoir', - 'qualification': 'painter' - }, - 'estimation': Decimal('10000.00'), - 'position': 1, - 'title': 'The Umbrellas', - 'year': '1886'}, - { - '@available': True, - '@id': 'b0836217463', - 'author': { - '@id': 'JM', - 'born': '1893-04-20', - 'dead': '1983-12-25', - 'name': u'Joan Miró', - 'qualification': 'painter, sculptor and ceramicist' - }, - 'position': 2, - 'title': None, - 'year': '1925' - }] -} - -_COLLECTION_PARKER = { - 'object': [{'author': {'born': '1841-02-25', - 'dead': '1919-12-03', - 'name': 'Pierre-Auguste Renoir', - 'qualification': 'painter'}, - 'estimation': 10000.0, - 'position': 1, - 'title': 'The Umbrellas', - 'year': '1886'}, - {'author': {'born': '1893-04-20', - 'dead': '1983-12-25', - 'name': u'Joan Miró', - 'qualification': 'painter, sculptor and ceramicist'}, - 'position': 2, - 'title': None, - 'year': '1925'}]} - -_COLLECTION_PARKER_ROOT = { - 'col:collection': {'object': [{'author': {'born': '1841-02-25', - 'dead': '1919-12-03', - 'name': 'Pierre-Auguste Renoir', - 'qualification': 'painter'}, - 'estimation': 10000.0, - 'position': 1, - 'title': 'The Umbrellas', - 'year': '1886'}, - {'author': {'born': '1893-04-20', - 'dead': '1983-12-25', - 'name': u'Joan Miró', - 'qualification': 'painter, sculptor and ceramicist'}, - 'position': 2, - 'title': None, - 'year': '1925'}]}} - -_COLLECTION_BADGERFISH = { - '@xmlns': { - 'col': 'http://example.com/ns/collection', - 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}, - 'col:collection': { - '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd', - 'object': [{ - '@available': True, - '@id': 'b0836217462', - 'author': { - '@id': 'PAR', - 'born': {'$': '1841-02-25'}, - 'dead': {'$': '1919-12-03'}, - 'name': {'$': 'Pierre-Auguste Renoir'}, - 'qualification': {'$': 'painter'}}, - 'estimation': {'$': 10000.0}, - 'position': {'$': 1}, - 'title': {'$': 'The Umbrellas'}, - 'year': {'$': '1886'}}, - { - '@available': True, - '@id': 'b0836217463', - 'author': { - '@id': 'JM', - 'born': {'$': '1893-04-20'}, - 'dead': {'$': '1983-12-25'}, - 'name': {'$': u'Joan Miró'}, - 'qualification': { - '$': 'painter, sculptor and ceramicist'} - }, - 'position': {'$': 2}, - 'title': {}, - 'year': {'$': '1925'} - }] - } -} - -_COLLECTION_ABDERA = { - 'attributes': { - 'xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd' - }, - 'children': [ - { - 'object': [ - { - 'attributes': {'available': True, 'id': 'b0836217462'}, - 'children': [{ - 'author': { - 'attributes': {'id': 'PAR'}, - 'children': [{ - 'born': '1841-02-25', - 'dead': '1919-12-03', - 'name': 'Pierre-Auguste Renoir', - 'qualification': 'painter'} - ]}, - 'estimation': 10000.0, - 'position': 1, - 'title': 'The Umbrellas', - 'year': '1886'} - ]}, - { - 'attributes': {'available': True, 'id': 'b0836217463'}, - 'children': [{ - 'author': { - 'attributes': {'id': 'JM'}, - 'children': [{ - 'born': '1893-04-20', - 'dead': '1983-12-25', - 'name': u'Joan Miró', - 'qualification': 'painter, sculptor and ceramicist'} - ]}, - 'position': 2, - 'title': [], - 'year': '1925' - }] - }] - } - ]} - -_COLLECTION_JSON_ML = [ - 'col:collection', - {'xmlns:col': 'http://example.com/ns/collection', - 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd'}, - ['object', - {'available': True, 'id': 'b0836217462'}, - ['position', 1], - ['title', 'The Umbrellas'], - ['year', '1886'], - [ - 'author', - {'id': 'PAR'}, - ['name', 'Pierre-Auguste Renoir'], - ['born', '1841-02-25'], - ['dead', '1919-12-03'], - ['qualification', 'painter'] - ], - [ - 'estimation', - Decimal('10000.00') - ]], - ['object', - {'available': True, 'id': 'b0836217463'}, - ['position', 2], - ['title'], - ['year', '1925'], - [ - 'author', - {'id': 'JM'}, - ['name', u'Joan Miró'], - ['born', '1893-04-20'], - ['dead', '1983-12-25'], - ['qualification', 'painter, sculptor and ceramicist'] - ]] -] - -_DATA_DICT = { - '@xmlns:ns': 'ns', - '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - '@xsi:schemaLocation': 'ns ./simple-types.xsd', - 'certification': [ - {'$': 'ISO-9001', '@Year': 1999}, - {'$': 'ISO-27001', '@Year': 2009} - ], - 'decimal_value': [Decimal('1')], - u'menù': u'baccalà mantecato', - u'complex_boolean': [ - {'$': True, '@Type': 2}, {'$': False, '@Type': 1}, True, False - ], - u'simple_boolean': [True, False] -} - - -def iter_nested_items(items, dict_class=dict, list_class=list): - if isinstance(items, dict_class): - for k, v in items.items(): - for value in iter_nested_items(v, dict_class, list_class): - yield value - elif isinstance(items, list_class): - for item in items: - for value in iter_nested_items(item, dict_class, list_class): - yield value - elif isinstance(items, dict): - raise TypeError("%r: is a dict() instead of %r." % (items, dict_class)) - elif isinstance(items, list): - raise TypeError("%r: is a list() instead of %r." % (items, list_class)) - else: - yield items - - -def make_validator_test_class(test_file, test_args, test_num, schema_class, check_with_lxml): - """ - Creates a validator test class. - - :param test_file: the XML test file path. - :param test_args: line arguments for test case. - :param test_num: a positive integer number associated with the test case. - :param schema_class: the schema class to use. - :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \ - Works only for XSD 1.0 tests. - """ - xml_file = os.path.relpath(test_file) - msg_tmpl = "\n\n{}: %s.".format(xml_file) - - # Extract schema test arguments - expected_errors = test_args.errors - expected_warnings = test_args.warnings - inspect = test_args.inspect - locations = test_args.locations - defuse = test_args.defuse - skip_strict = test_args.skip - debug_mode = test_args.debug - - class TestValidator(XMLSchemaTestCase): - - @classmethod - def setUpClass(cls): - # Builds schema instance using 'lax' validation mode to accepts also schemas with not crashing errors. - cls.schema_class = schema_class - source, _locations = xmlschema.fetch_schema_locations(xml_file, locations) - cls.schema = schema_class(source, validation='lax', locations=_locations, defuse=defuse) - if check_with_lxml and lxml_etree is not None: - cls.lxml_schema = lxml_etree.parse(source) - - cls.errors = [] - cls.chunks = [] - cls.longMessage = True - - if debug_mode: - print("\n##\n## Testing %r validation in debug mode.\n##" % xml_file) - pdb.set_trace() - - def check_etree_encode(self, root, converter=None, **kwargs): - data1 = self.schema.decode(root, converter=converter, **kwargs) - if isinstance(data1, tuple): - data1 = data1[0] # When validation='lax' - - for _ in iter_nested_items(data1, dict_class=ordered_dict_class): - pass - - elem1 = self.schema.encode(data1, path=root.tag, converter=converter, **kwargs) - if isinstance(elem1, tuple): - # When validation='lax' - if converter is not ParkerConverter: - for e in elem1[1]: - self.check_namespace_prefixes(unicode_type(e)) - elem1 = elem1[0] - - # Checks the encoded element to not contains reserved namespace prefixes - if 'namespaces' in kwargs and all('ns%d' % k not in kwargs['namespaces'] for k in range(10)): - self.check_namespace_prefixes(etree_tostring(elem1, namespaces=kwargs['namespaces'])) - - # Main check: compare original a re encoded tree - try: - etree_elements_assert_equal(root, elem1, strict=False) - except AssertionError as err: - # If the check fails retry only if the converter is lossy (eg. ParkerConverter) - # or if the XML case has defaults taken from the schema or some part of data - # decoding is skipped by schema wildcards (set the specific argument in testfiles). - if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: - if debug_mode: - pdb.set_trace() - raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original") - elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): - return # can't check encode equivalence if xsi:type is provided - else: - # Lossy or augmenting cases are checked after a re decoding-encoding pass - data2 = self.schema.decode(elem1, converter=converter, **kwargs) - if isinstance(data2, tuple): - data2 = data2[0] - - if sys.version_info >= (3, 6): - # For Python < 3.6 cannot ensure attribute decoding order - try: - self.assertEqual(data1, data2, msg_tmpl % "re decoded data changed") - except AssertionError: - if debug_mode: - pdb.set_trace() - raise - - elem2 = self.schema.encode(data2, path=root.tag, converter=converter, **kwargs) - if isinstance(elem2, tuple): - elem2 = elem2[0] - - try: - etree_elements_assert_equal(elem1, elem2, strict=False) - except AssertionError as err: - if debug_mode: - pdb.set_trace() - raise AssertionError(str(err) + msg_tmpl % "encoded tree differs after second pass") - - def check_json_serialization(self, root, converter=None, **kwargs): - data1 = xmlschema.to_json(root, schema=self.schema, converter=converter, **kwargs) - if isinstance(data1, tuple): - data1 = data1[0] - - elem1 = xmlschema.from_json(data1, schema=self.schema, path=root.tag, converter=converter, **kwargs) - if isinstance(elem1, tuple): - elem1 = elem1[0] - - data2 = xmlschema.to_json(elem1, schema=self.schema, converter=converter, **kwargs) - if isinstance(data2, tuple): - data2 = data2[0] - - if converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): - return # can't check encode equivalence if xsi:type is provided - elif sys.version_info >= (3, 6): - self.assertEqual(data2, data1, msg_tmpl % "serialized data changed at second pass") - else: - elem2 = xmlschema.from_json(data2, schema=self.schema, path=root.tag, converter=converter, **kwargs) - if isinstance(elem2, tuple): - elem2 = elem2[0] - try: - self.assertIsNone(etree_elements_assert_equal(elem1, elem2, strict=False, skip_comments=True)) - except AssertionError as err: - self.assertIsNone(err, None) - - def check_decoding_with_element_tree(self): - del self.errors[:] - del self.chunks[:] - - def do_decoding(): - for obj in self.schema.iter_decode(xml_file): - if isinstance(obj, (xmlschema.XMLSchemaDecodeError, xmlschema.XMLSchemaValidationError)): - self.errors.append(obj) - else: - self.chunks.append(obj) - - if expected_warnings == 0: - do_decoding() - else: - with warnings.catch_warnings(record=True) as ctx: - warnings.simplefilter("always") - do_decoding() - self.assertEqual(len(ctx), expected_warnings, "Wrong number of include/import warnings") - - self.check_errors(xml_file, expected_errors) - - if not self.chunks: - raise ValueError("No decoded object returned!!") - elif len(self.chunks) > 1: - raise ValueError("Too many ({}) decoded objects returned: {}".format(len(self.chunks), self.chunks)) - elif not isinstance(self.chunks[0], dict): - raise ValueError("Decoded object is not a dictionary: {}".format(self.chunks)) - else: - self.assertTrue(True, "Successfully test decoding for {}".format(xml_file)) - - def check_schema_serialization(self): - # Repeat with serialized-deserialized schema (only for Python 3) - serialized_schema = pickle.dumps(self.schema) - deserialized_schema = pickle.loads(serialized_schema) - errors = [] - chunks = [] - for obj in deserialized_schema.iter_decode(xml_file): - if isinstance(obj, xmlschema.XMLSchemaValidationError): - errors.append(obj) - else: - chunks.append(obj) - - self.assertEqual(len(errors), len(self.errors), msg_tmpl % "wrong number errors") - self.assertEqual(chunks, self.chunks, msg_tmpl % "decoded data differ") - - def check_decode_api(self): - # Compare with the decode API and other validation modes - strict_data = self.schema.decode(xml_file) - lax_data = self.schema.decode(xml_file, validation='lax') - skip_data = self.schema.decode(xml_file, validation='skip') - self.assertEqual(strict_data, self.chunks[0], msg_tmpl % "decode() API has a different result") - self.assertEqual(lax_data[0], self.chunks[0], msg_tmpl % "'lax' validation has a different result") - self.assertEqual(skip_data, self.chunks[0], msg_tmpl % "'skip' validation has a different result") - - def check_encoding_with_element_tree(self): - root = ElementTree.parse(xml_file).getroot() - namespaces = fetch_namespaces(xml_file) - options = {'namespaces': namespaces, 'dict_class': ordered_dict_class} - - self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter - self.check_etree_encode(root, ParkerConverter, validation='lax', **options) - self.check_etree_encode(root, ParkerConverter, validation='skip', **options) - self.check_etree_encode(root, BadgerFishConverter, **options) - self.check_etree_encode(root, AbderaConverter, **options) - self.check_etree_encode(root, JsonMLConverter, **options) - - options.pop('dict_class') - self.check_json_serialization(root, cdata_prefix='#', **options) - self.check_json_serialization(root, ParkerConverter, validation='lax', **options) - self.check_json_serialization(root, ParkerConverter, validation='skip', **options) - self.check_json_serialization(root, BadgerFishConverter, **options) - self.check_json_serialization(root, AbderaConverter, **options) - self.check_json_serialization(root, JsonMLConverter, **options) - - def check_decoding_and_encoding_with_lxml(self): - xml_tree = lxml_etree.parse(xml_file) - namespaces = fetch_namespaces(xml_file) - errors = [] - chunks = [] - for obj in self.schema.iter_decode(xml_tree, namespaces=namespaces): - if isinstance(obj, xmlschema.XMLSchemaValidationError): - errors.append(obj) - else: - chunks.append(obj) - - self.assertEqual(chunks, self.chunks, msg_tmpl % "decode data change with lxml") - self.assertEqual(len(errors), len(self.errors), msg_tmpl % "errors number change with lxml") - - if not errors: - root = xml_tree.getroot() - options = { - 'etree_element_class': lxml_etree_element, - 'namespaces': namespaces, - 'dict_class': ordered_dict_class, - } - - self.check_etree_encode(root, cdata_prefix='#', **options) # Default converter - self.check_etree_encode(root, ParkerConverter, validation='lax', **options) - self.check_etree_encode(root, ParkerConverter, validation='skip', **options) - self.check_etree_encode(root, BadgerFishConverter, **options) - self.check_etree_encode(root, AbderaConverter, **options) - self.check_etree_encode(root, JsonMLConverter, **options) - - options.pop('dict_class') - self.check_json_serialization(root, cdata_prefix='#', **options) - self.check_json_serialization(root, ParkerConverter, validation='lax', **options) - self.check_json_serialization(root, ParkerConverter, validation='skip', **options) - self.check_json_serialization(root, BadgerFishConverter, **options) - self.check_json_serialization(root, AbderaConverter, **options) - self.check_json_serialization(root, JsonMLConverter, **options) - - def check_validate_and_is_valid_api(self): - if expected_errors: - self.assertFalse(self.schema.is_valid(xml_file), msg_tmpl % "file with errors is valid") - self.assertRaises(XMLSchemaValidationError, self.schema.validate, xml_file) - else: - self.assertTrue(self.schema.is_valid(xml_file), msg_tmpl % "file without errors is not valid") - self.assertEqual(self.schema.validate(xml_file), None, - msg_tmpl % "file without errors not validated") - - def check_iter_errors(self): - self.assertEqual(len(list(self.schema.iter_errors(xml_file))), expected_errors, - msg_tmpl % "wrong number of errors (%d expected)" % expected_errors) - - def check_lxml_validation(self): - try: - schema = lxml_etree.XMLSchema(self.lxml_schema.getroot()) - except lxml_etree.XMLSchemaParseError: - print("\nSkip lxml.etree.XMLSchema validation test for {!r} ({})". - format(xml_file, TestValidator.__name__, )) - else: - xml_tree = lxml_etree.parse(xml_file) - if self.errors: - self.assertFalse(schema.validate(xml_tree)) - else: - self.assertTrue(schema.validate(xml_tree)) - - def test_xml_document_validation(self): - self.check_decoding_with_element_tree() - - if not inspect and sys.version_info >= (3,): - self.check_schema_serialization() - - if not self.errors: - self.check_encoding_with_element_tree() - - if lxml_etree is not None: - self.check_decoding_and_encoding_with_lxml() - - self.check_iter_errors() - self.check_validate_and_is_valid_api() - if check_with_lxml and lxml_etree is not None: - self.check_lxml_validation() - - TestValidator.__name__ = TestValidator.__qualname__ = 'TestValidator{0:03}'.format(test_num) - return TestValidator - - -class TestValidation(XMLSchemaTestCase): - - def check_validity(self, xsd_component, data, expected, use_defaults=True): - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, xsd_component.is_valid, data, use_defaults=use_defaults) - elif expected: - self.assertTrue(xsd_component.is_valid(data, use_defaults=use_defaults)) - else: - self.assertFalse(xsd_component.is_valid(data, use_defaults=use_defaults)) - - @unittest.skipIf(lxml_etree is None, "The lxml library is not available.") - def test_lxml(self): - xs = xmlschema.XMLSchema(self.casepath('examples/vehicles/vehicles.xsd')) - xt1 = lxml_etree.parse(self.casepath('examples/vehicles/vehicles.xml')) - xt2 = lxml_etree.parse(self.casepath('examples/vehicles/vehicles-1_error.xml')) - self.assertTrue(xs.is_valid(xt1)) - self.assertFalse(xs.is_valid(xt2)) - self.assertTrue(xs.validate(xt1) is None) - self.assertRaises(xmlschema.XMLSchemaValidationError, xs.validate, xt2) - - def test_issue_064(self): - self.check_validity(self.st_schema, '', False) - - def test_document_validate_api(self): - self.assertIsNone(xmlschema.validate(self.vh_xml_file)) - self.assertIsNone(xmlschema.validate(self.vh_xml_file, use_defaults=False)) - - vh_2_file = self.casepath('examples/vehicles/vehicles-2_errors.xml') - self.assertRaises(XMLSchemaValidationError, xmlschema.validate, vh_2_file) - - try: - xmlschema.validate(vh_2_file, namespaces={'vhx': "http://example.com/vehicles"}) - except XMLSchemaValidationError as err: - path_line = str(err).splitlines()[-1] - else: - path_line = '' - self.assertEqual('Path: /vhx:vehicles/vhx:cars', path_line) - - # Issue #80 - vh_2_xt = ElementTree.parse(vh_2_file) - self.assertRaises(XMLSchemaValidationError, xmlschema.validate, vh_2_xt, self.vh_xsd_file) - - def _test_document_validate_api_lazy(self): - source = xmlschema.XMLResource(self.col_xml_file, lazy=True) - source.root[0].clear() - source.root[1].clear() - xsd_element = self.col_schema.elements['collection'] - - for result in xsd_element.iter_decode(source.root, 'strict', namespaces=source.get_namespaces(), - source=source, _no_deep=None): - del result - - self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True)) - - -class TestValidation11(TestValidation): - schema_class = XMLSchema11 - - def test_default_attributes(self): - """ - Root Node - """ - xs = self.schema_class(self.casepath('features/attributes/default_attributes.xsd')) - self.assertTrue(xs.is_valid("" - " alpha" - " beta" - "")) - self.assertFalse(xs.is_valid("" - " alpha" # Misses required attribute - " beta" - "")) - - -class TestDecoding(XMLSchemaTestCase): - - def check_decode(self, xsd_component, data, expected, **kwargs): - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, xsd_component.decode, data, **kwargs) - else: - obj = xsd_component.decode(data, **kwargs) - if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list) \ - and isinstance(obj[1][0], Exception): - self.assertEqual(expected, obj[0]) - self.assertTrue(isinstance(obj[0], type(expected))) - else: - self.assertEqual(expected, obj) - self.assertTrue(isinstance(obj, type(expected))) - - @unittest.skipIf(lxml_etree is None, "The lxml library is not available.") - def test_lxml(self): - vh_xml_tree = lxml_etree.parse(self.vh_xml_file) - self.assertEqual(self.vh_schema.to_dict(vh_xml_tree), _VEHICLES_DICT) - self.assertEqual(xmlschema.to_dict(vh_xml_tree, self.vh_schema.url), _VEHICLES_DICT) - - def test_to_dict_from_etree(self): - vh_xml_tree = ElementTree.parse(self.vh_xml_file) - col_xml_tree = ElementTree.parse(self.col_xml_file) - - xml_dict = self.vh_schema.to_dict(vh_xml_tree) - self.assertNotEqual(xml_dict, _VEHICLES_DICT) - - xml_dict = self.vh_schema.to_dict(vh_xml_tree, namespaces=self.vh_namespaces) - self.assertEqual(xml_dict, _VEHICLES_DICT) - - xml_dict = xmlschema.to_dict(vh_xml_tree, self.vh_schema.url, namespaces=self.vh_namespaces) - self.assertEqual(xml_dict, _VEHICLES_DICT) - - xml_dict = self.col_schema.to_dict(col_xml_tree) - self.assertNotEqual(xml_dict, _COLLECTION_DICT) - - xml_dict = self.col_schema.to_dict(col_xml_tree, namespaces=self.col_namespaces) - self.assertEqual(xml_dict, _COLLECTION_DICT) - - xml_dict = xmlschema.to_dict(col_xml_tree, self.col_schema.url, namespaces=self.col_namespaces) - self.assertEqual(xml_dict, _COLLECTION_DICT) - - def test_to_dict_from_string(self): - with open(self.vh_xml_file) as f: - vh_xml_string = f.read() - - with open(self.col_xml_file) as f: - col_xml_string = f.read() - - xml_dict = self.vh_schema.to_dict(vh_xml_string, namespaces=self.vh_namespaces) - self.assertEqual(xml_dict, _VEHICLES_DICT) - - xml_dict = xmlschema.to_dict(vh_xml_string, self.vh_schema.url, namespaces=self.vh_namespaces) - self.assertEqual(xml_dict, _VEHICLES_DICT) - - xml_dict = self.col_schema.to_dict(col_xml_string, namespaces=self.col_namespaces) - self.assertTrue(xml_dict, _COLLECTION_DICT) - - xml_dict = xmlschema.to_dict(col_xml_string, self.col_schema.url, namespaces=self.col_namespaces) - self.assertTrue(xml_dict, _COLLECTION_DICT) - - def test_json_dump_and_load(self): - vh_xml_tree = ElementTree.parse(self.vh_xml_file) - col_xml_tree = ElementTree.parse(self.col_xml_file) - with open(self.vh_json_file, 'w') as f: - xmlschema.to_json(self.vh_xml_file, f) - - with open(self.vh_json_file) as f: - root = xmlschema.from_json(f, self.vh_schema) - - os.remove(self.vh_json_file) - self.check_etree_elements(vh_xml_tree, root) - - with open(self.col_json_file, 'w') as f: - xmlschema.to_json(self.col_xml_file, f) - - with open(self.col_json_file) as f: - root = xmlschema.from_json(f, self.col_schema) - - os.remove(self.col_json_file) - self.check_etree_elements(col_xml_tree, root) - - def test_path(self): - xt = ElementTree.parse(self.vh_xml_file) - xd = self.vh_schema.to_dict(xt, '/vh:vehicles/vh:cars', namespaces=self.vh_namespaces) - self.assertEqual(xd['vh:car'], _VEHICLES_DICT['vh:cars']['vh:car']) - xd = self.vh_schema.to_dict(xt, '/vh:vehicles/vh:bikes', namespaces=self.vh_namespaces) - self.assertEqual(xd['vh:bike'], _VEHICLES_DICT['vh:bikes']['vh:bike']) - - def test_validation_strict(self): - self.assertRaises( - xmlschema.XMLSchemaValidationError, - self.vh_schema.to_dict, - ElementTree.parse(self.casepath('examples/vehicles/vehicles-2_errors.xml')), - validation='strict', - namespaces=self.vh_namespaces - ) - - def test_validation_skip(self): - xt = ElementTree.parse(self.casepath('features/decoder/data3.xml')) - xd = self.st_schema.decode(xt, validation='skip', namespaces={'ns': 'ns'}) - self.assertEqual(xd['decimal_value'], ['abc']) - - def test_datatypes(self): - xt = ElementTree.parse(self.casepath('features/decoder/data.xml')) - xd = self.st_schema.to_dict(xt, namespaces=self.default_namespaces) - self.assertEqual(xd, _DATA_DICT) - - def test_datetime_types(self): - xs = self.get_schema('') - self.assertEqual(xs.decode('2019-01-01T13:40:00'), '2019-01-01T13:40:00') - self.assertEqual(xs.decode('2019-01-01T13:40:00', datetime_types=True), - datatypes.DateTime10.fromstring('2019-01-01T13:40:00')) - - xs = self.get_schema('') - self.assertEqual(xs.decode('2001-04-15'), '2001-04-15') - self.assertEqual(xs.decode('2001-04-15', datetime_types=True), - datatypes.Date10.fromstring('2001-04-15')) - - def test_duration_type(self): - xs = self.get_schema('') - self.assertEqual(xs.decode('P5Y3MT60H30.001S'), 'P5Y3MT60H30.001S') - self.assertEqual(xs.decode('P5Y3MT60H30.001S', datetime_types=True), - datatypes.Duration.fromstring('P5Y3M2DT12H30.001S')) - - def test_default_converter(self): - self.assertEqual(self.col_schema.to_dict(self.col_xml_file), _COLLECTION_DICT) - - default_dict = self.col_schema.to_dict(self.col_xml_file, converter=xmlschema.XMLSchemaConverter) - self.assertEqual(default_dict, _COLLECTION_DICT) - - default_dict_root = self.col_schema.to_dict(self.col_xml_file, preserve_root=True) - self.assertEqual(default_dict_root, {'col:collection': _COLLECTION_DICT}) - - def test_visitor_converter(self): - visitor_dict = self.col_schema.to_dict(self.col_xml_file, converter=UnorderedConverter) - self.assertEqual(visitor_dict, _COLLECTION_DICT) - - visitor_dict_root = self.col_schema.to_dict( - self.col_xml_file, converter=UnorderedConverter(preserve_root=True)) - self.assertEqual(visitor_dict_root, {'col:collection': _COLLECTION_DICT}) - - def test_parker_converter(self): - parker_dict = self.col_schema.to_dict(self.col_xml_file, converter=xmlschema.ParkerConverter) - self.assertEqual(parker_dict, _COLLECTION_PARKER) - - parker_dict_root = self.col_schema.to_dict( - self.col_xml_file, converter=xmlschema.ParkerConverter(preserve_root=True), decimal_type=float) - self.assertEqual(parker_dict_root, _COLLECTION_PARKER_ROOT) - - def test_badgerfish_converter(self): - badgerfish_dict = self.col_schema.to_dict( - self.col_xml_file, converter=xmlschema.BadgerFishConverter, decimal_type=float) - self.assertEqual(badgerfish_dict, _COLLECTION_BADGERFISH) - - def test_abdera_converter(self): - abdera_dict = self.col_schema.to_dict( - self.col_xml_file, converter=xmlschema.AbderaConverter, decimal_type=float, dict_class=dict) - self.assertEqual(abdera_dict, _COLLECTION_ABDERA) - - def test_json_ml_converter(self): - json_ml_dict = self.col_schema.to_dict(self.col_xml_file, converter=xmlschema.JsonMLConverter) - self.assertEqual(json_ml_dict, _COLLECTION_JSON_ML) - - def test_dict_granularity(self): - """Based on Issue #22, test to make sure an xsd indicating list with - dictionaries, returns just that even when it has a single dict. """ - xsd_string = self.casepath('issues/issue_022/xsd_string.xsd') - xml_string_1 = self.casepath('issues/issue_022/xml_string_1.xml') - xml_string_2 = self.casepath('issues/issue_022/xml_string_2.xml') - xsd_schema = xmlschema.XMLSchema(xsd_string) - xml_data_1 = xsd_schema.to_dict(xml_string_1) - xml_data_2 = xsd_schema.to_dict(xml_string_2) - self.assertTrue(isinstance(xml_data_1['bar'], type(xml_data_2['bar'])), - msg="XSD with an array that return a single element from xml must still yield a list.") - - def test_any_type(self): - any_type = xmlschema.XMLSchema.meta_schema.types['anyType'] - xml_data_1 = ElementTree.Element('dummy') - self.assertEqual(any_type.decode(xml_data_1), (None, [], [])) - xml_data_2 = ElementTree.fromstring('\n \n \n') - self.assertEqual(any_type.decode(xml_data_2), (None, [], [])) # Currently no decoding yet - - def test_choice_model_decoding(self): - schema = xmlschema.XMLSchema(self.casepath('issues/issue_041/issue_041.xsd')) - data = schema.to_dict(self.casepath('issues/issue_041/issue_041.xml')) - self.assertEqual(data, { - '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', - '@xsi:noNamespaceSchemaLocation': 'issue_041.xsd', - 'Name': 'SomeNameValueThingy', - 'Value': {'Integer': 0} - }) - - def test_cdata_decoding(self): - schema = xmlschema.XMLSchema(self.casepath('issues/issue_046/issue_046.xsd')) - xml_file = self.casepath('issues/issue_046/issue_046.xml') - self.assertEqual( - schema.decode(xml_file, dict_class=ordered_dict_class, cdata_prefix='#'), - ordered_dict_class( - [('@xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'), - ('@xsi:noNamespaceSchemaLocation', 'issue_046.xsd'), - ('#1', 'Dear Mr.'), ('name', 'John Smith'), - ('#2', '.\n Your order'), ('orderid', 1032), - ('#3', 'will be shipped on'), ('shipdate', '2001-07-13'), ('#4', '.')] - )) - - def test_string_facets(self): - none_empty_string_type = self.st_schema.types['none_empty_string'] - self.check_decode(none_empty_string_type, '', XMLSchemaValidationError) - name_type = self.st_schema.types['NameType'] - self.check_decode(name_type, '', XMLSchemaValidationError) - - def test_binary_data_facets(self): - hex_code_type = self.st_schema.types['hexCode'] - self.check_decode(hex_code_type, u'00D7310A', u'00D7310A') - - base64_code_type = self.st_schema.types['base64Code'] - self.check_decode(base64_code_type, base64.b64encode(b'ok'), XMLSchemaValidationError) - base64_value = base64.b64encode(b'hello') - self.check_decode(base64_code_type, base64_value, base64_value.decode('utf-8')) - self.check_decode(base64_code_type, base64.b64encode(b'abcefgh'), u'YWJjZWZnaA==') - self.check_decode(base64_code_type, b' Y W J j ZWZ\t\tn\na A= =', u'Y W J j ZWZ n a A= =') - self.check_decode(base64_code_type, u' Y W J j ZWZ\t\tn\na A= =', u'Y W J j ZWZ n a A= =') - self.check_decode(base64_code_type, base64.b64encode(b'abcefghi'), u'YWJjZWZnaGk=') - - self.check_decode(base64_code_type, u'YWJjZWZnaA=', XMLSchemaValidationError) - self.check_decode(base64_code_type, u'YWJjZWZna$==', XMLSchemaValidationError) - - base64_length4_type = self.st_schema.types['base64Length4'] - self.check_decode(base64_length4_type, base64.b64encode(b'abc'), XMLSchemaValidationError) - self.check_decode(base64_length4_type, base64.b64encode(b'abce'), u'YWJjZQ==') - self.check_decode(base64_length4_type, base64.b64encode(b'abcef'), XMLSchemaValidationError) - - base64_length5_type = self.st_schema.types['base64Length5'] - self.check_decode(base64_length5_type, base64.b64encode(b'1234'), XMLSchemaValidationError) - self.check_decode(base64_length5_type, base64.b64encode(b'12345'), u'MTIzNDU=') - self.check_decode(base64_length5_type, base64.b64encode(b'123456'), XMLSchemaValidationError) - - def test_decimal_type(self): - schema = self.get_schema(""" - - - - - - - """) - - self.check_decode(schema, '120.48', Decimal('120.48')) - self.check_decode(schema, '100.50', Decimal('100.50'), process_namespaces=False) - self.check_decode(schema, '100.49', XMLSchemaValidationError) - self.check_decode(schema, '120.48', 120.48, decimal_type=float) - # Issue #66 - self.check_decode(schema, '120.48', '120.48', decimal_type=str) - - def test_nillable(self): - # Issue #76 - xsd_string = """ - - - - - - - - - """ - xsd_schema = xmlschema.XMLSchema(xsd_string) - xml_string_1 = "0" - xml_string_2 = """ - - - - """ - self.assertTrue(xsd_schema.is_valid(source=xml_string_1, use_defaults=False)) - self.assertTrue(xsd_schema.is_valid(source=xml_string_2, use_defaults=False)) - obj = xsd_schema.decode(xml_string_2, use_defaults=False) - self.check_etree_elements(ElementTree.fromstring(xml_string_2), xsd_schema.encode(obj)) - - def test_default_namespace(self): - # Issue #77 - xs = xmlschema.XMLSchema(""" - - - """) - self.assertEqual(xs.to_dict("""bar""", - path='/foo', namespaces={'': 'http://example.com/foo'}), 'bar') - self.assertEqual(xs.to_dict("""bar""", - path='/foo', namespaces={'': 'http://example.com/foo'}), None) - - def test_complex_with_simple_content_restriction(self): - xs = self.schema_class(self.casepath('features/derivations/complex-with-simple-content-restriction.xsd')) - self.assertTrue(xs.is_valid('10')) - self.assertFalse(xs.is_valid('alpha')) - self.assertEqual(xs.decode('10'), 10) - - def test_union_types(self): - # For testing issue #103 - decimal_or_nan = self.st_schema.types['myType'] - self.check_decode(decimal_or_nan, '95.0', Decimal('95.0')) - self.check_decode(decimal_or_nan, 'NaN', u'NaN') - - def test_default_values(self): - # From issue #108 - xsd_text = """ - - - - - - - - - - - - - """ - - schema = self.schema_class(xsd_text) - self.assertEqual(schema.to_dict("text"), - {'@attrWithDefault': 'default_value', - '@attrWithFixed': 'fixed_value', - '$': 'text'}) - self.assertEqual(schema.to_dict(""), - {'@attrWithDefault': 'default_value', - '@attrWithFixed': 'fixed_value', - '$': 'default_value'}) - self.assertEqual(schema.to_dict("""text"""), - {'$': 'text', - '@attr': 'attr_value', - '@attrWithDefault': 'default_value', - '@attrWithFixed': 'fixed_value'}) - - self.assertEqual(schema.to_dict("text", use_defaults=False), - {'@attrWithFixed': 'fixed_value', '$': 'text'}) - self.assertEqual(schema.to_dict("""text""", use_defaults=False), - {'$': 'text', '@attr': 'attr_value', '@attrWithFixed': 'fixed_value'}) - self.assertEqual(schema.to_dict("", use_defaults=False), {'@attrWithFixed': 'fixed_value'}) - - self.assertEqual(schema.to_dict(""), 'default_value') - self.assertIsNone(schema.to_dict("", use_defaults=False)) - - def test_validation_errors(self): - xsd_text = """ - - - - - - - - - - - - """ - - schema = self.schema_class(xsd_text) - - self.assertIsNone(schema.to_dict("alpha", validation='lax')[0]) - self.assertEqual(schema.to_dict("20"), {'@int_attr': 10, '$': 20}) - self.assertEqual(schema.to_dict("20", validation='lax')[0], - {'@int_attr': None, '$': 20}) - self.assertEqual(schema.to_dict("20", validation='skip'), - {'@int_attr': 'wrong', '$': 20}) - - def test_error_message(self): - schema = self.schema_class(os.path.join(self.test_cases_dir, 'issues/issue_115/Rotation.xsd')) - rotation_data = '' - - message_lines = [] - try: - schema.decode(rotation_data) - except Exception as err: - message_lines = unicode_type(err).split('\n') - - self.assertTrue(message_lines, msg="Empty error message!") - self.assertEqual(message_lines[-6], 'Instance:') - self.assertEqual(message_lines[-4].strip(), rotation_data) - self.assertEqual(message_lines[-2], 'Path: /tns:rotation') - - -class TestDecoding11(TestDecoding): - schema_class = XMLSchema11 - - def test_datetime_types(self): - xs = self.get_schema('') - self.assertEqual(xs.decode('2019-01-01T13:40:00'), '2019-01-01T13:40:00') - self.assertEqual(xs.decode('2019-01-01T13:40:00', datetime_types=True), - datatypes.DateTime.fromstring('2019-01-01T13:40:00')) - - xs = self.get_schema('') - self.assertEqual(xs.decode('2001-04-15'), '2001-04-15') - self.assertEqual(xs.decode('2001-04-15', datetime_types=True), - datatypes.Date.fromstring('2001-04-15')) - - def test_derived_duration_types(self): - xs = self.get_schema('') - self.assertEqual(xs.decode('P0Y4M'), 'P0Y4M') - self.assertEqual(xs.decode('P2Y10M', datetime_types=True), - datatypes.Duration.fromstring('P2Y10M')) - - xs = self.get_schema('') - self.assertEqual(xs.decode('P2DT6H30M30.001S'), 'P2DT6H30M30.001S') - self.assertEqual(xs.decode('P2DT26H'), 'P2DT26H') - self.assertEqual(xs.decode('P2DT6H30M30.001S', datetime_types=True), - datatypes.Duration.fromstring('P2DT6H30M30.001S')) - - def test_type_alternatives(self): - xs = self.schema_class(self.casepath('features/elements/type_alternatives-no-ns.xsd')) - self.assertTrue(xs.is_valid('10')) - self.assertFalse(xs.is_valid('10.1')) - self.assertTrue(xs.is_valid('10.1')) - self.assertFalse(xs.is_valid('alpha')) - self.assertFalse(xs.is_valid('alpha')) - self.assertTrue(xs.is_valid('0')) - self.assertTrue(xs.is_valid('true')) - - xs = self.schema_class(self.casepath('features/elements/type_alternatives.xsd')) - self.assertTrue(xs.is_valid('10')) - self.assertFalse(xs.is_valid('10.1')) - self.assertTrue(xs.is_valid('10.1')) - self.assertFalse(xs.is_valid('alpha')) - self.assertFalse(xs.is_valid('alpha')) - self.assertTrue(xs.is_valid('0')) - self.assertTrue(xs.is_valid('true')) - - -class TestEncoding(XMLSchemaTestCase): - - def check_encode(self, xsd_component, data, expected, **kwargs): - if isinstance(expected, type) and issubclass(expected, Exception): - self.assertRaises(expected, xsd_component.encode, data, **kwargs) - elif is_etree_element(expected): - elem = xsd_component.encode(data, **kwargs) - self.check_etree_elements(expected, elem) - else: - obj = xsd_component.encode(data, **kwargs) - if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list): - self.assertEqual(expected, obj[0]) - self.assertTrue(isinstance(obj[0], type(expected))) - elif is_etree_element(obj): - namespaces = kwargs.pop('namespaces', self.default_namespaces) - self.assertEqual(expected, etree_tostring(obj, namespaces=namespaces).strip()) - else: - self.assertEqual(expected, obj) - self.assertTrue(isinstance(obj, type(expected))) - - def test_decode_encode(self): - filename = self.casepath('examples/collection/collection.xml') - xt = ElementTree.parse(filename) - xd = self.col_schema.to_dict(filename, dict_class=ordered_dict_class) - elem = self.col_schema.encode(xd, path='./col:collection', namespaces=self.col_namespaces) - - self.assertEqual( - len([e for e in elem.iter()]), 20, - msg="The encoded tree must have 20 elements as the origin." - ) - self.assertTrue(all([ - local_name(e1.tag) == local_name(e2.tag) - for e1, e2 in zip(elem.iter(), xt.getroot().iter()) - ])) - - def test_builtin_string_based_types(self): - self.check_encode(self.xsd_types['string'], 'sample string ', u'sample string ') - self.check_encode(self.xsd_types['normalizedString'], ' sample string ', u' sample string ') - self.check_encode(self.xsd_types['normalizedString'], '\n\r sample\tstring\n', u' sample string ') - self.check_encode(self.xsd_types['token'], '\n\r sample\t\tstring\n ', u'sample string') - self.check_encode(self.xsd_types['language'], 'sample string', XMLSchemaValidationError) - self.check_encode(self.xsd_types['language'], ' en ', u'en') - self.check_encode(self.xsd_types['Name'], 'first_name', u'first_name') - self.check_encode(self.xsd_types['Name'], ' first_name ', u'first_name') - self.check_encode(self.xsd_types['Name'], 'first name', XMLSchemaValidationError) - self.check_encode(self.xsd_types['Name'], '1st_name', XMLSchemaValidationError) - self.check_encode(self.xsd_types['Name'], 'first_name1', u'first_name1') - self.check_encode(self.xsd_types['Name'], 'first:name', u'first:name') - self.check_encode(self.xsd_types['NCName'], 'first_name', u'first_name') - self.check_encode(self.xsd_types['NCName'], 'first:name', XMLSchemaValidationError) - self.check_encode(self.xsd_types['ENTITY'], 'first:name', XMLSchemaValidationError) - self.check_encode(self.xsd_types['ID'], 'first:name', XMLSchemaValidationError) - self.check_encode(self.xsd_types['IDREF'], 'first:name', XMLSchemaValidationError) - - def test_builtin_decimal_based_types(self): - self.check_encode(self.xsd_types['decimal'], -99.09, u'-99.09') - self.check_encode(self.xsd_types['decimal'], '-99.09', u'-99.09') - self.check_encode(self.xsd_types['integer'], 1000, u'1000') - self.check_encode(self.xsd_types['integer'], 100.0, XMLSchemaEncodeError) - self.check_encode(self.xsd_types['integer'], 100.0, u'100', validation='lax') - self.check_encode(self.xsd_types['short'], 1999, u'1999') - self.check_encode(self.xsd_types['short'], 10000000, XMLSchemaValidationError) - self.check_encode(self.xsd_types['float'], 100.0, u'100.0') - self.check_encode(self.xsd_types['float'], 'hello', XMLSchemaEncodeError) - self.check_encode(self.xsd_types['double'], -4531.7, u'-4531.7') - self.check_encode(self.xsd_types['positiveInteger'], -1, XMLSchemaValidationError) - self.check_encode(self.xsd_types['positiveInteger'], 0, XMLSchemaValidationError) - self.check_encode(self.xsd_types['nonNegativeInteger'], 0, u'0') - self.check_encode(self.xsd_types['nonNegativeInteger'], -1, XMLSchemaValidationError) - self.check_encode(self.xsd_types['negativeInteger'], -100, u'-100') - self.check_encode(self.xsd_types['nonPositiveInteger'], 7, XMLSchemaValidationError) - self.check_encode(self.xsd_types['unsignedLong'], 101, u'101') - self.check_encode(self.xsd_types['unsignedLong'], -101, XMLSchemaValidationError) - self.check_encode(self.xsd_types['nonPositiveInteger'], 7, XMLSchemaValidationError) - - def test_builtin_list_types(self): - self.check_encode(self.xsd_types['IDREFS'], ['first_name'], u'first_name') - self.check_encode(self.xsd_types['IDREFS'], 'first_name', u'first_name') # Transform data to list - self.check_encode(self.xsd_types['IDREFS'], ['one', 'two', 'three'], u'one two three') - self.check_encode(self.xsd_types['IDREFS'], [1, 'two', 'three'], XMLSchemaValidationError) - self.check_encode(self.xsd_types['NMTOKENS'], ['one', 'two', 'three'], u'one two three') - self.check_encode(self.xsd_types['ENTITIES'], ('mouse', 'cat', 'dog'), u'mouse cat dog') - - def test_list_types(self): - list_of_strings = self.st_schema.types['list_of_strings'] - self.check_encode(list_of_strings, (10, 25, 40), u'', validation='lax') - self.check_encode(list_of_strings, (10, 25, 40), u'10 25 40', validation='skip') - self.check_encode(list_of_strings, ['a', 'b', 'c'], u'a b c', validation='skip') - - list_of_integers = self.st_schema.types['list_of_integers'] - self.check_encode(list_of_integers, (10, 25, 40), u'10 25 40') - self.check_encode(list_of_integers, (10, 25.0, 40), XMLSchemaValidationError) - self.check_encode(list_of_integers, (10, 25.0, 40), u'10 25 40', validation='lax') - - list_of_floats = self.st_schema.types['list_of_floats'] - self.check_encode(list_of_floats, [10.1, 25.0, 40.0], u'10.1 25.0 40.0') - self.check_encode(list_of_floats, [10.1, 25, 40.0], u'10.1 25.0 40.0', validation='lax') - self.check_encode(list_of_floats, [10.1, False, 40.0], u'10.1 0.0 40.0', validation='lax') - - list_of_booleans = self.st_schema.types['list_of_booleans'] - self.check_encode(list_of_booleans, [True, False, True], u'true false true') - self.check_encode(list_of_booleans, [10, False, True], XMLSchemaEncodeError) - self.check_encode(list_of_booleans, [True, False, 40.0], u'true false', validation='lax') - self.check_encode(list_of_booleans, [True, False, 40.0], u'true false 40.0', validation='skip') - - def test_union_types(self): - integer_or_float = self.st_schema.types['integer_or_float'] - self.check_encode(integer_or_float, -95, u'-95') - self.check_encode(integer_or_float, -95.0, u'-95.0') - self.check_encode(integer_or_float, True, XMLSchemaEncodeError) - self.check_encode(integer_or_float, True, u'1', validation='lax') - - integer_or_string = self.st_schema.types['integer_or_string'] - self.check_encode(integer_or_string, 89, u'89') - self.check_encode(integer_or_string, 89.0, u'89', validation='lax') - self.check_encode(integer_or_string, 89.0, XMLSchemaEncodeError) - self.check_encode(integer_or_string, False, XMLSchemaEncodeError) - self.check_encode(integer_or_string, "Venice ", u'Venice ') - - boolean_or_integer_or_string = self.st_schema.types['boolean_or_integer_or_string'] - self.check_encode(boolean_or_integer_or_string, 89, u'89') - self.check_encode(boolean_or_integer_or_string, 89.0, u'89', validation='lax') - self.check_encode(boolean_or_integer_or_string, 89.0, XMLSchemaEncodeError) - self.check_encode(boolean_or_integer_or_string, False, u'false') - self.check_encode(boolean_or_integer_or_string, "Venice ", u'Venice ') - - def test_simple_elements(self): - elem = etree_element('{ns}A') - elem.text = '89' - self.check_encode(self.get_element('A', type='string'), '89', elem) - self.check_encode(self.get_element('A', type='integer'), 89, elem) - elem.text = '-10.4' - self.check_encode(self.get_element('A', type='float'), -10.4, elem) - elem.text = 'false' - self.check_encode(self.get_element('A', type='boolean'), False, elem) - elem.text = 'true' - self.check_encode(self.get_element('A', type='boolean'), True, elem) - - self.check_encode(self.get_element('A', type='short'), 128000, XMLSchemaValidationError) - elem.text = '0' - self.check_encode(self.get_element('A', type='nonNegativeInteger'), 0, elem) - self.check_encode(self.get_element('A', type='nonNegativeInteger'), '0', XMLSchemaValidationError) - self.check_encode(self.get_element('A', type='positiveInteger'), 0, XMLSchemaValidationError) - elem.text = '-1' - self.check_encode(self.get_element('A', type='negativeInteger'), -1, elem) - self.check_encode(self.get_element('A', type='nonNegativeInteger'), -1, XMLSchemaValidationError) - - def test_complex_elements(self): - schema = self.get_schema(""" - - - - - - - - - - """) - self.check_encode( - schema.elements['A'], data={'@a1': 10, '@a2': -1, '$': 'simple '}, - expected='simple ', - ) - self.check_encode( - schema.elements['A'], {'@a1': 10, '@a2': -1, '$': 'simple '}, - ElementTree.fromstring('simple '), - ) - self.check_encode( - schema.elements['A'], {'@a1': 10, '@a2': -1}, - ElementTree.fromstring('') - ) - self.check_encode( - schema.elements['A'], {'@a1': 10, '$': 'simple '}, - ElementTree.fromstring('simple ') - ) - self.check_encode(schema.elements['A'], {'@a2': -1, '$': 'simple '}, XMLSchemaValidationError) - - schema = self.get_schema(""" - - - - - - - - - """) - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('B3', False)]), - expected=u'\nabc\n10\nfalse\n', - indent=0, - ) - self.check_encode(schema.elements['A'], {'B1': 'abc', 'B2': 10, 'B4': False}, XMLSchemaValidationError) - - converter_cls = getattr(self.schema_class, "converter", None) - if converter_cls and issubclass(converter_cls, UnorderedConverter): - # UnorderedConverter doesn't use ordered content which makes - # it incompatible with cdata. - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), - expected=XMLSchemaValueError, - indent=0, cdata_prefix='#' - ) - else: - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), - expected=u'\nabc\n10\nhello\ntrue\n', - indent=0, cdata_prefix='#' - ) - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello')]), - expected=XMLSchemaValidationError, indent=0, cdata_prefix='#' - ) - - def test_encode_unordered_content(self): - schema = self.get_schema(""" - - - - - - - - - """) - converter_cls = getattr(self.schema_class, "converter", None) - if converter_cls and issubclass(converter_cls, UnorderedConverter): - expected = u'\nabc\n10\ntrue\n' - else: - expected = XMLSchemaChildrenValidationError - - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B2', 10), ('B1', 'abc'), ('B3', True)]), - expected=expected, - indent=0, cdata_prefix='#' - ) - - def test_encode_datetime(self): - xs = self.get_schema('') - - dt = xs.decode('2019-01-01T13:40:00', datetime_types=True) - self.assertEqual( - etree_tostring(xs.encode(dt)), - '2019-01-01T13:40:00' - ) - - def test_encode_date(self): - xs = self.get_schema('') - date = xs.decode('2001-04-15', datetime_types=True) - self.assertEqual( - etree_tostring(xs.encode(date)), - '2001-04-15' - ) - - def test_duration(self): - xs = self.get_schema('') - duration = xs.decode('P5Y3MT60H30.001S', datetime_types=True) - self.assertEqual( - etree_tostring(xs.encode(duration)), - 'P5Y3M2DT12H30.001S' - ) - - def test_gregorian_year(self): - xs = self.get_schema('') - gyear = xs.decode('2000', datetime_types=True) - self.assertEqual( - etree_tostring(xs.encode(gyear)), - '2000' - ) - - def test_gregorian_yearmonth(self): - xs = self.get_schema('') - gyear_month = xs.decode('2000-12', datetime_types=True) - self.assertEqual( - etree_tostring(xs.encode(gyear_month)), - '2000-12' - ) - - def test_error_message(self): - schema = self.schema_class(os.path.join(self.test_cases_dir, 'issues/issue_115/Rotation.xsd')) - rotation_data = { - "@roll": 0.0, - "@pitch": 0.0, - "@yaw": -1.0 # <----- invalid value, must be between 0 and 360 - } - - message_lines = [] - try: - schema.encode(rotation_data) - except Exception as err: - message_lines = unicode_type(err).split('\n') - - self.assertTrue(message_lines, msg="Empty error message!") - self.assertEqual(message_lines[-4], 'Instance:') - if sys.version_info < (3, 8): - text = '' - else: - text = '' - self.assertEqual(message_lines[-2].strip(), text) - - def test_strict_trailing_content(self): - """Too many elements for a group raises an exception.""" - schema = self.get_schema(""" - - - - - - - - """) - self.check_encode( - schema.elements['foo'], - data={"A": [1, 2, 3]}, - expected=XMLSchemaChildrenValidationError, - ) - - -class TestEncoding11(TestEncoding): - schema_class = XMLSchema11 - - -class XMLSchemaUnorderedConverter(xmlschema.XMLSchema): - converter = UnorderedConverter - - -class TestEncodingUnorderedConverter10(TestEncoding): - schema_class = XMLSchemaUnorderedConverter - - def test_visitor_converter_repeated_sequence_of_elements(self): - schema = self.get_schema(""" - - - - - - - - - """) - tree = schema.to_etree( - {"A": [1, 2], "B": [3, 4]}, - ) - vals = [] - for elem in tree: - vals.append(elem.text) - self.assertEqual(vals, ['1', '3', '2', '4']) - - -class XMLSchema11UnorderedConverter(XMLSchema11): - converter = UnorderedConverter - - -class TestEncodingUnorderedConverter11(TestEncoding): - schema_class = XMLSchema11UnorderedConverter - - -# Creates decoding/encoding tests classes from XML files -globals().update(tests_factory(make_validator_test_class, 'xml')) - - if __name__ == '__main__': + import unittest + import os + from xmlschema.tests import print_test_header + from xmlschema.tests.test_factory import tests_factory, make_validator_test_class + + def load_tests(loader, tests, pattern): + validation_dir = os.path.join(os.path.dirname(__file__), 'validation') + validation_tests = loader.discover(start_dir=validation_dir, pattern=pattern or '*') + tests.addTests(validation_tests) + return tests + + # Creates schema tests from XML files + globals().update(tests_factory(make_validator_test_class, 'xml')) print_test_header() unittest.main() diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index ddf8e18..dbeb25c 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -11,60 +11,154 @@ # """ This module runs tests concerning the W3C XML Schema 1.1 test suite. +Execute this module as script to run the tests. For default all the +schema tests are built and run. To operate a different selection you +can provide the following options: + + --xml: run also XML instance tests + --xsd10: run only XSD 1.0 tests + --xsd11: run only XSD 1.1 tests + --valid: run only tests set as valid + --invalid: run only tests set as invalid + +Additionally you can provide an unlimited list of positive integers to +run only the tests associated with a progressive list of index. +Also the unittest options are accepted (run with --help to show a summary +of available options). """ from __future__ import print_function, unicode_literals import unittest +import argparse import os.path import xml.etree.ElementTree as ElementTree +import sys +import warnings -import xmlschema -from xmlschema import XMLSchemaException +from xmlschema import validate, XMLSchema10, XMLSchema11, XMLSchemaException +from xmlschema.tests import print_test_header TEST_SUITE_NAMESPACE = "http://www.w3.org/XML/2004/xml-schema-test-suite/" XLINK_NAMESPACE = "http://www.w3.org/1999/xlink" +XSD_VERSION_VALUES = {'1.0 1.1', '1.0', '1.1'} ADMITTED_VALIDITY = {'valid', 'invalid', 'indeterminate'} #### # Tests that are incompatible with XSD meta-schema validation or that are postponed SKIPPED_TESTS = { # Signed as valid that have to be checked - '../msData/additional/addB194.xsd', # 4826: invalid xml:lang='enu' - '../msData/particles/particlesZ001.xsd', # 10957: Invalid in XSD 1.0 - '../msData/simpleType/stE110.xsd', # 13892: Circular xs:union declaration - '../saxonData/Missing/missing001.xsd', # 14405: missing type (this may be valid in 'lax' mode?) - '../saxonData/Missing/missing002.xsd', # 14406: missing substitution group - '../saxonData/Missing/missing003.xsd', # 14406: missing type and substitution group - '../saxonData/Missing/missing006.xsd', # 14410: missing list item type - '../saxonData/VC/vc001.xsd', # 14411: VC namespace required - '../saxonData/VC/vc002.xsd', # 14412: VC namespace required - '../saxonData/VC/vc014.xsd', # 14413: VC namespace required - '../saxonData/VC/vc024.xsd', # 14414: VC 1.1? required - '../saxonData/XmlVersions/xv004.xsd', # 14419: non-BMP chars allowed in names in XML 1.1+ + '../msData/additional/addB194.xsd', # invalid xml:lang='enu' + '../msData/particles/particlesZ001.xsd', # Invalid in XSD 1.0 + '../msData/simpleType/stE110.xsd', # Circular xs:union declaration + '../saxonData/Missing/missing001.xsd', # missing type (this may be valid in 'lax' mode?) + '../saxonData/Missing/missing002.xsd', # missing substitution group + '../saxonData/Missing/missing003.xsd', # missing type and substitution group + '../saxonData/Missing/missing006.xsd', # missing list item type + '../saxonData/VC/vc001.xsd', # VC namespace required + '../saxonData/VC/vc002.xsd', # VC namespace required + '../saxonData/VC/vc014.xsd', # VC namespace required + '../saxonData/VC/vc024.xsd', # VC 1.1? required + '../saxonData/XmlVersions/xv004.xsd', # non-BMP chars allowed in names in XML 1.1+ + + # Signed as valid that depends by implementation choice + '../saxonData/Assert/assert-simple007.xsd', # XPath [err:FOCA0002] invalid lexical value + + # Signed as valid but not implemented yet + '../saxonData/Assert/assert011.xsd', # TODO: XPath 2 doc() function in elementpath # Invalid that may be valid - '../sunData/combined/xsd003b/xsd003b.e.xsd', # 3981: Redefinition that may be valid - '../msData/additional/adhocAddC002.xsd', # 4642: Lack of the processor on XML namespace knowledge - '../msData/additional/test65026.xsd', # 4712: Lack of the processor on XML namespace knowledge - '../msData/annotations/annotF001.xsd', # 4989: Annotation contains xml:lang="" ?? (but xml.xsd allows '') - '../msData/datatypes/Facets/base64Binary/base64Binary_enumeration003.xsd', # 7277: check base64 invalid values - '../msData/datatypes/Facets/anyURI/anyURI_a001.xsd', # 7292: XSD 1.0 limited URI (see RFC 2396 + RFC 2732) - '../msData/datatypes/Facets/anyURI/anyURI_a003.xsd', # 7294: XSD 1.0 limited URI (see RFC 2396 + RFC 2732) - '../msData/datatypes/Facets/anyURI/anyURI_b004.xsd', # 7310: XSD 1.0 limited URI (see RFC 2396 + RFC 2732) - '../msData/datatypes/Facets/anyURI/anyURI_b006.xsd', # 7312: XSD 1.0 limited URI (see RFC 2396 + RFC 2732) - '../msData/element/elemZ026.xsd', # 8541: This is good because the head element is abstract - '../msData/element/elemZ031.xsd', # 8557: Valid in Python that has arbitrary large integers - '../msData/errata10/errC005.xsd', # 8558: Typo: abstract attribute must be set to "true" to fail - '../msData/group/groupH021.xsd', # 8679: TODO: wrong in XSD 1.0, good in XSD 1.1 - '../msData/identityConstraint/idC019.xsd', # 8936: TODO: is it an error? - '../msData/identityConstraint/idI148.xsd', # 9291: FIXME attribute::* in a selector (restrict XPath parser) - '../msData/identityConstraint/idJ016.xsd', # 9311: FIXME xpath="xpns: *" not allowed?? - '../msData/modelGroups/mgE006.xsd', # 9712: Is valid (is mg007.xsd invalid for the same reason) + '../msData/additional/adhocAddC002.xsd', # Lack of the processor on XML namespace knowledge + '../msData/additional/test65026.xsd', # Lack of the processor on XML namespace knowledge + '../msData/annotations/annotF001.xsd', # Annotation contains xml:lang="" ?? (but xml.xsd allows '') + '../msData/datatypes/Facets/base64Binary/base64Binary_enumeration003.xsd', # check base64 invalid values + '../msData/datatypes/Facets/anyURI/anyURI_a001.xsd', # XSD 1.0 limited URI (see RFC 2396 + RFC 2732) + '../msData/datatypes/Facets/anyURI/anyURI_a003.xsd', # XSD 1.0 limited URI (see RFC 2396 + RFC 2732) + '../msData/datatypes/Facets/anyURI/anyURI_b004.xsd', # XSD 1.0 limited URI (see RFC 2396 + RFC 2732) + '../msData/datatypes/Facets/anyURI/anyURI_b006.xsd', # XSD 1.0 limited URI (see RFC 2396 + RFC 2732) + '../msData/element/elemZ026.xsd', # This is good because the head element is abstract + '../msData/element/elemZ031.xsd', # Valid in Python that has arbitrary large integers + '../msData/group/groupH021.xsd', # TODO: wrong in XSD 1.0, good in XSD 1.1 + '../msData/identityConstraint/idC019.xsd', # TODO: is it an error? + '../msData/identityConstraint/idI148.xsd', # FIXME attribute::* in a selector (restrict XPath parser) + '../msData/modelGroups/mgE006.xsd', # Is valid? (is mg007.xsd invalid for the same reason) + '../msData/particles/particlesV020.xsd', # 10942: see http://www.w3.org/Bugs/Public/show_bug.cgi?id=4147 - # Invalid that are valid because depend by implementation choices - '../msData/schema/schG6_a.xsd', # 13639: Schema is valid because the ns import is done once, validation fails. - '../msData/schema/schG11_a.xsd', # 13544: Schema is valid because the ns import is done once, validation fails. + # Invalid that maybe valid because depends by implementation choices + '../msData/schema/schG6_a.xsd', # Schema is valid because the ns import is done once, validation fails. + '../msData/schema/schG11_a.xsd', # Schema is valid because the ns import is done once, validation fails. + + # Indeterminate that depends by implementation choices + '../msData/particles/particlesZ026a.xsd', + '../msData/schema/schG14a.xsd', + '../msData/schema/schU3_a.xsd', # Circular redefines + '../msData/schema/schU4_a.xsd', # Circular redefines + '../msData/schema/schU5_a.xsd', # Circular redefines + '../msData/schema/schZ012_a.xsd', # Comparison of file urls to be case sensitive or not + '../msData/schema/schZ015.xsd', # schemaLocation="" + + # Invalid XML tests + '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 + '../msData/additional/test93490_8.xml', # 4799: Idem + + # Skip for missing XML version 1.1 implementation + '../saxonData/XmlVersions/xv001.v01.xml', # 14850 + '../saxonData/XmlVersions/xv003.v01.xml', # 14852 + '../saxonData/XmlVersions/xv005.v01.xml', # 14854 + '../saxonData/XmlVersions/xv006.v01.xml', # 14855: invalid character  (valid in XML 1.1) + '../saxonData/XmlVersions/xv006.n02.xml', # 14855: invalid character 𐀀 (valid in XML 1.1) + '../saxonData/XmlVersions/xv008.v01.xml', # 14857 + '../saxonData/XmlVersions/xv008.n01.xml', # 14857 + + # Skip for TODO + '../sunData/combined/005/test.1.v.xml', # 3959: is valid but needs equality operators (#cos-ct-derived-ok) } +XSD11_SKIPPED_TESTS = { + # Invalid that may be valid + '../msData/regex/reK86.xsd', # \P{Is} is valid in regex for XSD 1.1 + '../msData/regex/reK87.xsd', # \P{Is} is valid in regex for XSD 1.1 + '../msData/particles/particlesHb009.xsd', # valid in XSD 1.1 + '../msData/particles/particlesZ033_g.xsd', # valid in XSD 1.1 (signed invalid for engine limitation) + '../saxonData/Override/over026.bad.xsd', # Same as over003.xsd, that is signed as valid. + '../saxonData/CTA/cta0043.xsd', # Only a warning for type table difference on restriction + '../saxonData/Wild/wild069.xsd', # Maybe inverted? + + # TODO: schema tests + '../saxonData/CTA/cta9005err.xsd', # 14549: Type alternative using an inherited attribute + '../saxonData/CTA/cta9008err.xsd', # 14552: Type alternative using an inherited attribute +} + +# Total files counters +total_xsd_files = 0 +total_xml_files = 0 + + +def extract_additional_arguments(): + """ + Get and expunge additional simple arguments from sys.argv. These arguments + are not parsed with argparse but are checked and removed from sys.argv in + order to avoid errors from argument parsing at unittest level. + """ + try: + return argparse.Namespace( + xml='--xml' in sys.argv, + version='1.0' if '--xsd10' in sys.argv else '1.1' if '--xsd11' in sys.argv else '1.0 1.1', + expected=('valid',) if '--valid' in sys.argv else ('invalid',) if '--invalid' in sys.argv + else ('indeterminate',) if '--unknown' in sys.argv else ADMITTED_VALIDITY, + verbose='-v' in sys.argv or '--verbose' in sys.argv, + numbers=[int(sys.argv[k]) for k in range(len(sys.argv)) + if sys.argv[k].isdigit() and sys.argv[k] != '0' and k and sys.argv[k - 1] != '-k'] + ) + finally: + sys.argv = [ + sys.argv[k] for k in range(len(sys.argv)) + if sys.argv[k] not in { + '--xml', '--xsd10', '--xsd11', '--valid', '--invalid', '--unknown' + } and (not sys.argv[k].isdigit() or sys.argv[k] == '0' or not k or sys.argv[k - 1] == '-k') + ] + + +args = extract_additional_arguments() + def fetch_xsd_test_suite(): parent = os.path.dirname @@ -78,77 +172,219 @@ def fetch_xsd_test_suite(): raise FileNotFoundError("can't find the XSD suite index file suite.xml ...") -def create_w3c_test_group_case(filename, group_elem, group_number, xsd_version='1.0'): +def create_w3c_test_group_case(filename, group_elem, group_num, xsd_version='1.0'): """ Creates a test class for a W3C test group. :param filename: the filename of the testSet that owns the testGroup. :param group_elem: the Element instance of the test group. - :param group_number: a positive integer to distinguish and order test groups. + :param group_num: a positive integer to distinguish and order test groups. :param xsd_version: if '1.1' uses XSD 1.1 validator class, otherwise uses the XSD 1.0 validator. """ - name = group_elem.attrib['name'] + def get_test_conf(elem): + schema_test = elem.tag.endswith('schemaTest') + if schema_test: + tag = '{%s}schemaDocument' % TEST_SUITE_NAMESPACE + else: + tag = '{%s}instanceDocument' % TEST_SUITE_NAMESPACE - if xsd_version == '1.1': - schema_class = xmlschema.validators.XMLSchema11 - if group_elem.get('version') == '1.0': - raise ValueError("testGroup %r is not suited for XSD 1.1" % name) - elif group_elem.get('version') == '1.1': - pass # raise ValueError("testGroup %r is not suited for XSD 1.0" % name) - else: - schema_class = xmlschema.XMLSchema + try: + source_href = elem.find(tag).get('{%s}href' % XLINK_NAMESPACE) + except AttributeError: + return + else: + if not schema_test and source_href.endswith('.testSet'): + return + if source_href in SKIPPED_TESTS: + if args.numbers: + if source_href.endswith('.xsd'): + print("Skip test number %d ..." % testgroup_num) + else: + print("Skip file %r for test number %d ..." % (source_href, testgroup_num)) + return - schema_elem = group_elem.find('{%s}schemaTest' % TEST_SUITE_NAMESPACE) - if schema_elem is not None: - schema_document = schema_elem.find('{%s}schemaDocument' % TEST_SUITE_NAMESPACE) - schema_path = schema_document.get('{%s}href' % XLINK_NAMESPACE) - if schema_path in SKIPPED_TESTS: + # Normalize and check file path + source_path = os.path.normpath(os.path.join(os.path.dirname(filename), source_href)) + if not os.path.isfile(source_path): + print("ERROR: file %r not found!" % source_path) return - schema_path = os.path.normpath(os.path.join(os.path.dirname(filename), schema_path)) + test_conf = {} - if not os.path.isfile(schema_path): - raise ValueError("Schema file %r not found!" % schema_path) + for version in xsd_version.split(): + if 'version' in elem.attrib and version not in elem.attrib['version']: + continue + elif version not in args.version: + continue + elif version == '1.1' and source_href in XSD11_SKIPPED_TESTS: + continue - expected = elem = None - for elem in schema_elem.findall('{%s}expected' % TEST_SUITE_NAMESPACE): - if 'version' not in elem.attrib: - expected = elem.attrib['validity'] - elif elem.attrib['version'] in (xsd_version, 'full-xpath-in-CTA'): - expected = elem.attrib['validity'] - break + for e in elem.findall('{%s}expected' % TEST_SUITE_NAMESPACE): + if 'version' not in e.attrib: + test_conf[version] = e.attrib['validity'] + elif e.attrib['version'] == version or \ + e.attrib['version'] == 'full-xpath-in-CTA': + test_conf[version] = e.attrib['validity'] + break - if expected is None: - raise ValueError("Missing expected validity for XSD %s" % xsd_version) - elif expected not in ADMITTED_VALIDITY: - raise ValueError("Wrong validity=%r attribute for %r" % (expected, elem)) + if version not in test_conf: + msg = "ERROR: Missing expected validity for XSD version %s in %r of test group %r" + print(msg % (version, elem, name)) + return + elif test_conf[version] not in ADMITTED_VALIDITY: + msg = "ERROR: Wrong validity=%r attribute for XSD version %s in %r test group %r" + print(msg % (test_conf[version], version, elem, name)) + return + elif test_conf[version] not in args.expected: + test_conf.pop(version) + elif test_conf[version] == 'indeterminate': + if args.verbose: + print("WARNING: Skip indeterminate test group %r" % name) + test_conf.pop(version) - else: - schema_path = expected = None + if test_conf: + test_conf['source'] = source_path + if schema_test and not source_path.endswith('.xml'): + test_conf['sources'] = [ + os.path.normpath( + os.path.join(os.path.dirname(filename), schema_href.get('{%s}href' % XLINK_NAMESPACE)) + ) + for schema_href in elem.findall(tag) + ] + return test_conf - if expected == 'invalid': - class TestGroupCase(unittest.TestCase): - def test_invalid_schema(self): - with self.assertRaises(XMLSchemaException, msg="Schema %r may be invalid" % schema_path) as _: - schema_class(schema_path, use_meta=False) + if group_num == 1: + return # Skip introspection tests that have several failures due to schema mismatch. + elif args.numbers and group_num not in args.numbers: + return - elif expected == 'valid': - class TestGroupCase(unittest.TestCase): - @classmethod - def setUpClass(cls): - try: - cls.schema = schema_class(schema_path, use_meta=False) if schema_path else None - except TypeError: - cls.schema = None + name = group_elem.attrib['name'] + group_tests = [] + global total_xsd_files + global total_xml_files - def test_valid_schema(self): - if schema_path: - self.assertIsInstance(schema_class(schema_path, use_meta=False), schema_class) - else: - return # expected is None or 'indeterminate' + # Get schema/instance path + for k, child in enumerate(group_elem.iterfind('{%s}schemaTest' % TEST_SUITE_NAMESPACE)): + if k: + print("ERROR: multiple schemaTest definition in group %r" % name) + return + config = get_test_conf(child) + if not config: + return + group_tests.append(config) + total_xsd_files += 1 + + if args.xml: + for child in group_elem.iterfind('{%s}instanceTest' % TEST_SUITE_NAMESPACE): + if 'version' in child.attrib and child.attrib['version'] not in args.version: + continue + config = get_test_conf(child) + if config: + group_tests.append(config) + total_xml_files += 1 + + if not group_tests: + if len(args.expected) > 1 and args.xml: + print("ERROR: Missing both schemaTest and instanceTest in test group %r" % name) + return + + class TestGroupCase(unittest.TestCase): + + @unittest.skipIf(group_tests[0]['source'].endswith('.xml'), 'No schema test') + def test_xsd_schema(self): + for item in filter(lambda x: x['source'].endswith('.xsd'), group_tests): + source = item['source'] + rel_path = os.path.relpath(source) + + for version, expected in sorted(filter(lambda x: not x[0].startswith('source'), item.items())): + schema_class = XMLSchema11 if version == '1.1' else XMLSchema10 + if expected == 'invalid': + message = "schema %s should be invalid with XSD %s" % (rel_path, version) + with self.assertRaises(XMLSchemaException, msg=message): + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + if len(item['sources']) <= 1: + schema_class(source, use_meta=False) + else: + schema = schema_class(source, use_meta=False, build=False) + for other in item['sources'][1:]: + schema_class(other, global_maps=schema.maps, build=False) + schema.build() + else: + try: + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + if len(item['sources']) <= 1: + schema = schema_class(source, use_meta=False) + else: + schema = schema_class(source, use_meta=False, build=False) + for other in item['sources'][1:]: + schema_class(other, global_maps=schema.maps, build=False) + schema.build() + except XMLSchemaException as err: + schema = None + message = "schema %s should be valid with XSD %s, but an error is raised:" \ + "\n\n%s" % (rel_path, version, str(err)) + else: + message = None + + self.assertIsInstance(schema, schema_class, msg=message) + + @unittest.skipIf(group_tests[0]['source'].endswith('.xsd') and len(group_tests) == 1, 'No instance tests') + def test_xml_instances(self): + if group_tests[0]['source'].endswith('.xsd'): + schema = group_tests[0]['source'] + schemas = group_tests[0]['sources'] + else: + schema = None + schemas = [] + + for item in filter(lambda x: not x['source'].endswith('.xsd'), group_tests): + source = item['source'] + rel_path = os.path.relpath(source) + + for version, expected in sorted(filter(lambda x: x[0] != 'source', item.items())): + schema_class = XMLSchema11 if version == '1.1' else XMLSchema10 + if expected == 'invalid': + message = "instance %s should be invalid with XSD %s" % (rel_path, version) + with self.assertRaises((XMLSchemaException, ElementTree.ParseError), msg=message): + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + if len(schemas) <= 1: + validate(source, schema=schema, cls=schema_class) + else: + xs = schema_class(schemas[0], use_meta=False, build=False) + for other in schemas[1:]: + schema_class(other, global_maps=xs.maps, build=False) + xs.build() + xs.validate(source) + else: + try: + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + if len(schemas) <= 1: + validate(source, schema=schema, cls=schema_class) + else: + xs = schema_class(schemas[0], use_meta=False, build=False) + for other in schemas[1:]: + schema_class(other, global_maps=xs.maps, build=False) + xs.build() + xs.validate(source) + + except (XMLSchemaException, ElementTree.ParseError) as err: + error = "instance %s should be valid with XSD %s, but an error " \ + "is raised:\n\n%s" % (rel_path, version, str(err)) + else: + error = None + self.assertIsNone(error) + + if not any(g['source'].endswith('.xsd') for g in group_tests): + del TestGroupCase.test_xsd_schema + if not any(g['source'].endswith('.xml') for g in group_tests): + del TestGroupCase.test_xml_instances TestGroupCase.__name__ = TestGroupCase.__qualname__ = str( - 'TestGroupCase{0:05}_{1}'.format(group_number, name.replace('-', '_')) + 'TestGroupCase{0:05}_{1}'.format(group_num, name.replace('-', '_')) ) return TestGroupCase @@ -158,30 +394,66 @@ if __name__ == '__main__': index_dir = os.path.dirname(index_path) suite_xml = ElementTree.parse(index_path) - HREF_ATTRIBUTE = "{%s}href" % XLINK_NAMESPACE test_classes = {} - testgroup_num = 1 + testgroup_num = 0 + + print_test_header() + + if args.verbose: + print("\n>>>>> ADD TEST GROUPS FROM TESTSET FILES <<<<<\n") for testset_elem in suite_xml.iter("{%s}testSetRef" % TEST_SUITE_NAMESPACE): - testset_file = os.path.join(index_dir, testset_elem.attrib.get(HREF_ATTRIBUTE, '')) + href_attr = testset_elem.attrib.get("{%s}href" % XLINK_NAMESPACE, '') + testset_file = os.path.join(index_dir, href_attr) + testset_groups = 0 - testset_xml = ElementTree.parse(testset_file) - testset_version = testset_xml.getroot().get('version') - if testset_version is not None and '1.0' not in testset_version: + testset = ElementTree.parse(testset_file) + testset_version = testset.getroot().get('version', '1.0 1.1') + if testset_version not in XSD_VERSION_VALUES: + print("Testset file %r has an invalid version=%r, skip ..." % (href_attr, testset_version)) continue - # print("*** {} ***".format(testset_file)) + for testgroup_elem in testset.iter("{%s}testGroup" % TEST_SUITE_NAMESPACE): + testgroup_num += 1 - for testgroup_elem in testset_xml.iter("{%s}testGroup" % TEST_SUITE_NAMESPACE): - if testgroup_elem.get('version') == '1.1': + testgroup_version = testgroup_elem.get('version', testset_version) + if testgroup_version == 'full-xpath-in-CTA': + # skip full XPath test for the moment ... + if args.verbose: + print("Skip full XPath test %r ..." % testgroup_elem.get('name')) continue + elif testgroup_version not in XSD_VERSION_VALUES: + _msg = "Test group %r has an invalid version=%r, skip ..." + print(_msg % (testgroup_elem.get('name'), testgroup_version)) + continue + elif testgroup_version not in testset_version: + if args.verbose: + _msg = "Warning: Test group %r version=%r is not included in test set version=%r" + print(_msg % (testgroup_elem.get('name'), testgroup_version, testset_version)) - cls = create_w3c_test_group_case(testset_file, testgroup_elem, testgroup_num) + cls = create_w3c_test_group_case( + filename=testset_file, + group_elem=testgroup_elem, + group_num=testgroup_num, + xsd_version=testgroup_version, + ) if cls is not None: test_classes[cls.__name__] = cls - testgroup_num += 1 + testset_groups += 1 + + if args.verbose and testset_groups: + print("Added {} test groups from {}".format(testset_groups, href_attr)) globals().update(test_classes) - # print_test_header() + if test_classes: + print("\n+++ Number of classes under test: %d +++" % len(test_classes)) + if total_xml_files: + print("+++ Number of XSD schemas under test: %d +++" % total_xsd_files) + print("+++ Number of XML files under test: %d +++" % total_xml_files) + print() + + if args.verbose: + print("\n>>>>> RUN TEST GROUPS <<<<<\n") + unittest.main() diff --git a/xmlschema/tests/test_xpath.py b/xmlschema/tests/test_xpath.py index f3f0d2e..1a99781 100644 --- a/xmlschema/tests/test_xpath.py +++ b/xmlschema/tests/test_xpath.py @@ -18,15 +18,15 @@ import xml.etree.ElementTree as ElementTree from elementpath import XPath1Parser, Selector, ElementPathSyntaxError from xmlschema import XMLSchema -from xmlschema.tests import XMLSchemaTestCase +from xmlschema.tests import casepath -class XsdXPathTest(XMLSchemaTestCase): +class XsdXPathTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls.xs1 = XMLSchema(cls.casepath("examples/vehicles/vehicles.xsd")) - cls.xs2 = XMLSchema(cls.casepath("examples/collection/collection.xsd")) + cls.xs1 = XMLSchema(casepath("examples/vehicles/vehicles.xsd")) + cls.xs2 = XMLSchema(casepath("examples/collection/collection.xsd")) cls.cars = cls.xs1.elements['vehicles'].type.content_type[0] cls.bikes = cls.xs1.elements['vehicles'].type.content_type[1] @@ -45,43 +45,43 @@ class XsdXPathTest(XMLSchemaTestCase): self.assertTrue(self.xs1.findall('.')) self.assertTrue(isinstance(self.xs1.find('.'), XMLSchema)) self.assertTrue(sorted(self.xs1.findall("*"), key=lambda x: x.name) == elements) - self.assertTrue(self.xs1.findall("*") == self.xs1.findall("./*")) - self.assertTrue(self.xs1.find("./vh:bikes") == self.xs1.elements['bikes']) - self.assertTrue(self.xs1.find("./vh:vehicles/vh:cars").name == self.xs1.elements['cars'].name) - self.assertFalse(self.xs1.find("./vh:vehicles/vh:cars") == self.xs1.elements['cars']) - self.assertFalse(self.xs1.find("/vh:vehicles/vh:cars") == self.xs1.elements['cars']) - self.assertTrue(self.xs1.find("vh:vehicles/vh:cars/..") == self.xs1.elements['vehicles']) - self.assertTrue(self.xs1.find("vh:vehicles/*/..") == self.xs1.elements['vehicles']) - self.assertTrue(self.xs1.find("vh:vehicles/vh:cars/../vh:cars") == self.xs1.find("vh:vehicles/vh:cars")) + self.assertListEqual(self.xs1.findall("*"), self.xs1.findall("./*")) + self.assertEqual(self.xs1.find("./vh:bikes"), self.xs1.elements['bikes']) + self.assertEqual(self.xs1.find("./vh:vehicles/vh:cars").name, self.xs1.elements['cars'].name) + self.assertNotEqual(self.xs1.find("./vh:vehicles/vh:cars"), self.xs1.elements['cars']) + self.assertNotEqual(self.xs1.find("/vh:vehicles/vh:cars"), self.xs1.elements['cars']) + self.assertEqual(self.xs1.find("vh:vehicles/vh:cars/.."), self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/*/.."), self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/vh:cars/../vh:cars"), self.xs1.find("vh:vehicles/vh:cars")) def test_xpath_axis(self): - self.assertTrue(self.xs1.find("vh:vehicles/child::vh:cars/..") == self.xs1.elements['vehicles']) + self.assertEqual(self.xs1.find("vh:vehicles/child::vh:cars/.."), self.xs1.elements['vehicles']) def test_xpath_subscription(self): - self.assertTrue(len(self.xs1.findall("./vh:vehicles/*")) == 2) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[2]") == [self.bikes]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[3]") == []) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[last()-1]") == [self.cars]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*[position()=last()]") == [self.bikes]) + self.assertEqual(len(self.xs1.findall("./vh:vehicles/*")), 2) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[2]"), [self.bikes]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[3]"), []) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[last()-1]"), [self.cars]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*[position()=last()]"), [self.bikes]) def test_xpath_group(self): - self.assertTrue(self.xs1.findall("/(vh:vehicles/*/*)") == self.xs1.findall("/vh:vehicles/*/*")) - self.assertTrue(self.xs1.findall("/(vh:vehicles/*/*)[1]") == self.xs1.findall("/vh:vehicles/*/*[1]")) + self.assertEqual(self.xs1.findall("/(vh:vehicles/*/*)"), self.xs1.findall("/vh:vehicles/*/*")) + self.assertEqual(self.xs1.findall("/(vh:vehicles/*/*)[1]"), self.xs1.findall("/vh:vehicles/*/*[1]")[:1]) def test_xpath_predicate(self): car = self.xs1.elements['cars'].type.content_type[0] - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]") == [car]) - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]") == [car]) - self.assertTrue(self.xs1.findall("./vh:vehicles/vh:cars['ciao']") == [self.cars]) - self.assertTrue(self.xs1.findall("./vh:vehicles/*['']") == []) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars/vh:car[@make]"), [car]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/vh:cars['ciao']"), [self.cars]) + self.assertListEqual(self.xs1.findall("./vh:vehicles/*['']"), []) def test_xpath_descendants(self): selector = Selector('.//xs:element', self.xs2.namespaces, parser=XPath1Parser) elements = list(selector.iter_select(self.xs2.root)) - self.assertTrue(len(elements) == 14) + self.assertEqual(len(elements), 14) selector = Selector('.//xs:element|.//xs:attribute|.//xs:keyref', self.xs2.namespaces, parser=XPath1Parser) elements = list(selector.iter_select(self.xs2.root)) - self.assertTrue(len(elements) == 17) + self.assertEqual(len(elements), 17) def test_xpath_issues(self): namespaces = {'ps': "http://schemas.microsoft.com/powershell/2004/04"} diff --git a/xmlschema/tests/validation/__init__.py b/xmlschema/tests/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/xmlschema/tests/validation/test_decoding.py b/xmlschema/tests/validation/test_decoding.py new file mode 100644 index 0000000..93d2050 --- /dev/null +++ b/xmlschema/tests/validation/test_decoding.py @@ -0,0 +1,738 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import unittest +import os +from decimal import Decimal +import base64 +from elementpath import datatypes + +import xmlschema +from xmlschema import XMLSchemaValidationError, ParkerConverter, BadgerFishConverter, \ + AbderaConverter, JsonMLConverter + +from xmlschema.converters import UnorderedConverter +from xmlschema.compat import unicode_type, ordered_dict_class +from xmlschema.etree import ElementTree, lxml_etree +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + +VEHICLES_DICT = { + '@xmlns:vh': 'http://example.com/vehicles', + '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + '@xsi:schemaLocation': 'http://example.com/vehicles vehicles.xsd', + 'vh:cars': { + 'vh:car': [ + {'@make': 'Porsche', '@model': '911'}, + {'@make': 'Porsche', '@model': '911'} + ]}, + 'vh:bikes': { + 'vh:bike': [ + {'@make': 'Harley-Davidson', '@model': 'WL'}, + {'@make': 'Yamaha', '@model': 'XS650'} + ]} +} + +VEHICLES_DICT_ALT = [ + {'vh:cars': [ + {'vh:car': None, '@make': 'Porsche', '@model': '911'}, + {'vh:car': None, '@make': 'Porsche', '@model': '911'} + ]}, + {'vh:bikes': [ + {'vh:bike': None, '@make': 'Harley-Davidson', '@model': 'WL'}, + {'vh:bike': None, '@make': 'Yamaha', '@model': 'XS650'} + ]}, + {'@xsi:schemaLocation': 'http://example.com/vehicles vehicles.xsd'} +] + +COLLECTION_DICT = { + '@xmlns:col': 'http://example.com/ns/collection', + '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd', + 'object': [{ + '@available': True, + '@id': 'b0836217462', + 'author': { + '@id': 'PAR', + 'born': '1841-02-25', + 'dead': '1919-12-03', + 'name': 'Pierre-Auguste Renoir', + 'qualification': 'painter' + }, + 'estimation': Decimal('10000.00'), + 'position': 1, + 'title': 'The Umbrellas', + 'year': '1886'}, + { + '@available': True, + '@id': 'b0836217463', + 'author': { + '@id': 'JM', + 'born': '1893-04-20', + 'dead': '1983-12-25', + 'name': u'Joan Miró', + 'qualification': 'painter, sculptor and ceramicist' + }, + 'position': 2, + 'title': None, + 'year': '1925' + }] +} + +COLLECTION_PARKER = { + 'object': [{'author': {'born': '1841-02-25', + 'dead': '1919-12-03', + 'name': 'Pierre-Auguste Renoir', + 'qualification': 'painter'}, + 'estimation': 10000.0, + 'position': 1, + 'title': 'The Umbrellas', + 'year': '1886'}, + {'author': {'born': '1893-04-20', + 'dead': '1983-12-25', + 'name': u'Joan Miró', + 'qualification': 'painter, sculptor and ceramicist'}, + 'position': 2, + 'title': None, + 'year': '1925'}]} + +COLLECTION_PARKER_ROOT = { + 'col:collection': {'object': [{'author': {'born': '1841-02-25', + 'dead': '1919-12-03', + 'name': 'Pierre-Auguste Renoir', + 'qualification': 'painter'}, + 'estimation': 10000.0, + 'position': 1, + 'title': 'The Umbrellas', + 'year': '1886'}, + {'author': {'born': '1893-04-20', + 'dead': '1983-12-25', + 'name': u'Joan Miró', + 'qualification': 'painter, sculptor and ceramicist'}, + 'position': 2, + 'title': None, + 'year': '1925'}]}} + +COLLECTION_BADGERFISH = { + '@xmlns': { + 'col': 'http://example.com/ns/collection', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}, + 'col:collection': { + '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd', + 'object': [{ + '@available': True, + '@id': 'b0836217462', + 'author': { + '@id': 'PAR', + 'born': {'$': '1841-02-25'}, + 'dead': {'$': '1919-12-03'}, + 'name': {'$': 'Pierre-Auguste Renoir'}, + 'qualification': {'$': 'painter'}}, + 'estimation': {'$': 10000.0}, + 'position': {'$': 1}, + 'title': {'$': 'The Umbrellas'}, + 'year': {'$': '1886'}}, + { + '@available': True, + '@id': 'b0836217463', + 'author': { + '@id': 'JM', + 'born': {'$': '1893-04-20'}, + 'dead': {'$': '1983-12-25'}, + 'name': {'$': u'Joan Miró'}, + 'qualification': { + '$': 'painter, sculptor and ceramicist'} + }, + 'position': {'$': 2}, + 'title': {}, + 'year': {'$': '1925'} + }] + } +} + +COLLECTION_ABDERA = { + 'attributes': { + 'xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd' + }, + 'children': [ + { + 'object': [ + { + 'attributes': {'available': True, 'id': 'b0836217462'}, + 'children': [{ + 'author': { + 'attributes': {'id': 'PAR'}, + 'children': [{ + 'born': '1841-02-25', + 'dead': '1919-12-03', + 'name': 'Pierre-Auguste Renoir', + 'qualification': 'painter'} + ]}, + 'estimation': 10000.0, + 'position': 1, + 'title': 'The Umbrellas', + 'year': '1886'} + ]}, + { + 'attributes': {'available': True, 'id': 'b0836217463'}, + 'children': [{ + 'author': { + 'attributes': {'id': 'JM'}, + 'children': [{ + 'born': '1893-04-20', + 'dead': '1983-12-25', + 'name': u'Joan Miró', + 'qualification': 'painter, sculptor and ceramicist'} + ]}, + 'position': 2, + 'title': [], + 'year': '1925' + }] + }] + } + ]} + +COLLECTION_JSON_ML = [ + 'col:collection', + {'xmlns:col': 'http://example.com/ns/collection', + 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd'}, + ['object', + {'available': True, 'id': 'b0836217462'}, + ['position', 1], + ['title', 'The Umbrellas'], + ['year', '1886'], + [ + 'author', + {'id': 'PAR'}, + ['name', 'Pierre-Auguste Renoir'], + ['born', '1841-02-25'], + ['dead', '1919-12-03'], + ['qualification', 'painter'] + ], + [ + 'estimation', + Decimal('10000.00') + ]], + ['object', + {'available': True, 'id': 'b0836217463'}, + ['position', 2], + ['title'], + ['year', '1925'], + [ + 'author', + {'id': 'JM'}, + ['name', u'Joan Miró'], + ['born', '1893-04-20'], + ['dead', '1983-12-25'], + ['qualification', 'painter, sculptor and ceramicist'] + ]] +] + +DATA_DICT = { + '@xmlns:ns': 'ns', + '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + '@xsi:schemaLocation': 'ns ./simple-types.xsd', + 'certification': [ + {'$': 'ISO-9001', '@Year': 1999}, + {'$': 'ISO-27001', '@Year': 2009} + ], + 'decimal_value': [Decimal('1')], + u'menù': u'baccalà mantecato', + u'complex_boolean': [ + {'$': True, '@Type': 2}, {'$': False, '@Type': 1}, True, False + ], + u'simple_boolean': [True, False] +} + + +class TestDecoding(XsdValidatorTestCase): + + def check_decode(self, xsd_component, data, expected, **kwargs): + if isinstance(expected, type) and issubclass(expected, Exception): + self.assertRaises(expected, xsd_component.decode, data, **kwargs) + else: + obj = xsd_component.decode(data, **kwargs) + if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list) \ + and isinstance(obj[1][0], Exception): + self.assertEqual(expected, obj[0]) + self.assertTrue(isinstance(obj[0], type(expected))) + else: + self.assertEqual(expected, obj) + self.assertTrue(isinstance(obj, type(expected))) + + @unittest.skipIf(lxml_etree is None, "The lxml library is not available.") + def test_lxml(self): + vh_xml_tree = lxml_etree.parse(self.vh_xml_file) + self.assertEqual(self.vh_schema.to_dict(vh_xml_tree), VEHICLES_DICT) + self.assertEqual(xmlschema.to_dict(vh_xml_tree, self.vh_schema.url), VEHICLES_DICT) + + def test_to_dict_from_etree(self): + vh_xml_tree = ElementTree.parse(self.vh_xml_file) + col_xml_tree = ElementTree.parse(self.col_xml_file) + + xml_dict = self.vh_schema.to_dict(vh_xml_tree) + self.assertNotEqual(xml_dict, VEHICLES_DICT) + + xml_dict = self.vh_schema.to_dict(vh_xml_tree, namespaces=self.vh_namespaces) + self.assertEqual(xml_dict, VEHICLES_DICT) + + xml_dict = xmlschema.to_dict(vh_xml_tree, self.vh_schema.url, namespaces=self.vh_namespaces) + self.assertEqual(xml_dict, VEHICLES_DICT) + + xml_dict = self.col_schema.to_dict(col_xml_tree) + self.assertNotEqual(xml_dict, COLLECTION_DICT) + + xml_dict = self.col_schema.to_dict(col_xml_tree, namespaces=self.col_namespaces) + self.assertEqual(xml_dict, COLLECTION_DICT) + + xml_dict = xmlschema.to_dict(col_xml_tree, self.col_schema.url, namespaces=self.col_namespaces) + self.assertEqual(xml_dict, COLLECTION_DICT) + + def test_to_dict_from_string(self): + with open(self.vh_xml_file) as f: + vh_xml_string = f.read() + + with open(self.col_xml_file) as f: + col_xml_string = f.read() + + xml_dict = self.vh_schema.to_dict(vh_xml_string, namespaces=self.vh_namespaces) + self.assertEqual(xml_dict, VEHICLES_DICT) + + xml_dict = xmlschema.to_dict(vh_xml_string, self.vh_schema.url, namespaces=self.vh_namespaces) + self.assertEqual(xml_dict, VEHICLES_DICT) + + xml_dict = self.col_schema.to_dict(col_xml_string, namespaces=self.col_namespaces) + self.assertTrue(xml_dict, COLLECTION_DICT) + + xml_dict = xmlschema.to_dict(col_xml_string, self.col_schema.url, namespaces=self.col_namespaces) + self.assertTrue(xml_dict, COLLECTION_DICT) + + def test_date_decoding(self): + # Issue #136 + schema = xmlschema.XMLSchema(""" + + + + + + + + + + """) + + self.assertEqual(schema.to_dict("2019-01-01"), '2019-01-01') + self.assertEqual(schema.to_dict("2019-01-01", datetime_types=True), + datatypes.Date10.fromstring('2019-01-01')) + + data, errors = schema.to_dict("2019-01-01", validation='lax') + self.assertEqual(data, '2019-01-01') + self.assertEqual(errors, []) + + data, errors = schema.to_dict("2019-01-01", validation='lax', datetime_types=True) + self.assertEqual(data, datatypes.Date10.fromstring('2019-01-01')) + self.assertEqual(errors, []) + + data, errors = schema.to_dict("1999-12-31", validation='lax') + self.assertEqual(data, '1999-12-31') + self.assertEqual(len(errors), 1) + self.assertIn('value has to be greater or equal than', unicode_type(errors[0])) + + data, errors = schema.to_dict("1999-12-31", validation='lax', datetime_types=True) + self.assertEqual(data, datatypes.Date10.fromstring('1999-12-31')) + self.assertEqual(len(errors), 1) + + data, errors = schema.to_dict("2019", validation='lax') + self.assertIsNone(data) + self.assertEqual(len(errors), 1) + + with self.assertRaises(XMLSchemaValidationError): + schema.to_dict("2019") + + data, errors = schema.to_dict("2019", validation='lax') + self.assertIsNone(data) + self.assertEqual(len(errors), 1) + + def test_json_dump_and_load(self): + vh_xml_tree = ElementTree.parse(self.vh_xml_file) + col_xml_tree = ElementTree.parse(self.col_xml_file) + with open(self.vh_json_file, 'w') as f: + xmlschema.to_json(self.vh_xml_file, f) + + with open(self.vh_json_file) as f: + root = xmlschema.from_json(f, self.vh_schema) + + os.remove(self.vh_json_file) + self.check_etree_elements(vh_xml_tree, root) + + with open(self.col_json_file, 'w') as f: + xmlschema.to_json(self.col_xml_file, f) + + with open(self.col_json_file) as f: + root = xmlschema.from_json(f, self.col_schema) + + os.remove(self.col_json_file) + self.check_etree_elements(col_xml_tree, root) + + def test_path(self): + xt = ElementTree.parse(self.vh_xml_file) + xd = self.vh_schema.to_dict(xt, '/vh:vehicles/vh:cars', namespaces=self.vh_namespaces) + self.assertEqual(xd['vh:car'], VEHICLES_DICT['vh:cars']['vh:car']) + xd = self.vh_schema.to_dict(xt, '/vh:vehicles/vh:bikes', namespaces=self.vh_namespaces) + self.assertEqual(xd['vh:bike'], VEHICLES_DICT['vh:bikes']['vh:bike']) + + def test_validation_strict(self): + self.assertRaises( + xmlschema.XMLSchemaValidationError, + self.vh_schema.to_dict, + ElementTree.parse(self.casepath('examples/vehicles/vehicles-2_errors.xml')), + validation='strict', + namespaces=self.vh_namespaces + ) + + def test_validation_skip(self): + xt = ElementTree.parse(self.casepath('features/decoder/data3.xml')) + xd = self.st_schema.decode(xt, validation='skip', namespaces={'ns': 'ns'}) + self.assertEqual(xd['decimal_value'], ['abc']) + + def test_datatypes(self): + xt = ElementTree.parse(self.casepath('features/decoder/data.xml')) + xd = self.st_schema.to_dict(xt, namespaces=self.default_namespaces) + self.assertEqual(xd, DATA_DICT) + + def test_datetime_types(self): + xs = self.get_schema('') + self.assertEqual(xs.decode('
2019-01-01T13:40:00
'), '2019-01-01T13:40:00') + self.assertEqual(xs.decode('
2019-01-01T13:40:00
', datetime_types=True), + datatypes.DateTime10.fromstring('2019-01-01T13:40:00')) + + xs = self.get_schema('') + self.assertEqual(xs.decode('
2001-04-15
'), '2001-04-15') + self.assertEqual(xs.decode('
2001-04-15
', datetime_types=True), + datatypes.Date10.fromstring('2001-04-15')) + + def test_duration_type(self): + xs = self.get_schema('') + self.assertEqual(xs.decode('P5Y3MT60H30.001S'), 'P5Y3MT60H30.001S') + self.assertEqual(xs.decode('P5Y3MT60H30.001S', datetime_types=True), + datatypes.Duration.fromstring('P5Y3M2DT12H30.001S')) + + def test_default_converter(self): + self.assertEqual(self.col_schema.to_dict(self.col_xml_file), COLLECTION_DICT) + + default_dict = self.col_schema.to_dict(self.col_xml_file, converter=xmlschema.XMLSchemaConverter) + self.assertEqual(default_dict, COLLECTION_DICT) + + default_dict_root = self.col_schema.to_dict(self.col_xml_file, preserve_root=True) + self.assertEqual(default_dict_root, {'col:collection': COLLECTION_DICT}) + + def test_visitor_converter(self): + visitor_dict = self.col_schema.to_dict(self.col_xml_file, converter=UnorderedConverter) + self.assertEqual(visitor_dict, COLLECTION_DICT) + + visitor_dict_root = self.col_schema.to_dict( + self.col_xml_file, converter=UnorderedConverter(preserve_root=True)) + self.assertEqual(visitor_dict_root, {'col:collection': COLLECTION_DICT}) + + def test_parker_converter(self): + parker_dict = self.col_schema.to_dict(self.col_xml_file, converter=xmlschema.ParkerConverter) + self.assertEqual(parker_dict, COLLECTION_PARKER) + + parker_dict_root = self.col_schema.to_dict( + self.col_xml_file, converter=ParkerConverter(preserve_root=True), decimal_type=float) + self.assertEqual(parker_dict_root, COLLECTION_PARKER_ROOT) + + def test_badgerfish_converter(self): + badgerfish_dict = self.col_schema.to_dict( + self.col_xml_file, converter=BadgerFishConverter, decimal_type=float) + self.assertEqual(badgerfish_dict, COLLECTION_BADGERFISH) + + def test_abdera_converter(self): + abdera_dict = self.col_schema.to_dict( + self.col_xml_file, converter=AbderaConverter, decimal_type=float, dict_class=dict) + self.assertEqual(abdera_dict, COLLECTION_ABDERA) + + def test_json_ml_converter(self): + json_ml_dict = self.col_schema.to_dict(self.col_xml_file, converter=JsonMLConverter) + self.assertEqual(json_ml_dict, COLLECTION_JSON_ML) + + def test_dict_granularity(self): + """Based on Issue #22, test to make sure an xsd indicating list with + dictionaries, returns just that even when it has a single dict. """ + xsd_string = self.casepath('issues/issue_022/xsd_string.xsd') + xml_string_1 = self.casepath('issues/issue_022/xml_string_1.xml') + xml_string_2 = self.casepath('issues/issue_022/xml_string_2.xml') + xsd_schema = xmlschema.XMLSchema(xsd_string) + xml_data_1 = xsd_schema.to_dict(xml_string_1) + xml_data_2 = xsd_schema.to_dict(xml_string_2) + self.assertTrue(isinstance(xml_data_1['bar'], type(xml_data_2['bar'])), + msg="XSD with an array that return a single element from xml must still yield a list.") + + def test_any_type(self): + any_type = xmlschema.XMLSchema.meta_schema.types['anyType'] + xml_data_1 = ElementTree.Element('dummy') + self.assertEqual(any_type.decode(xml_data_1), (None, [], [])) + xml_data_2 = ElementTree.fromstring('\n \n \n') + self.assertEqual(any_type.decode(xml_data_2), (None, [], [])) # Currently no decoding yet + + def test_choice_model_decoding(self): + schema = xmlschema.XMLSchema(self.casepath('issues/issue_041/issue_041.xsd')) + data = schema.to_dict(self.casepath('issues/issue_041/issue_041.xml')) + self.assertEqual(data, { + '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', + '@xsi:noNamespaceSchemaLocation': 'issue_041.xsd', + 'Name': 'SomeNameValueThingy', + 'Value': {'Integer': 0} + }) + + def test_cdata_decoding(self): + schema = xmlschema.XMLSchema(self.casepath('issues/issue_046/issue_046.xsd')) + xml_file = self.casepath('issues/issue_046/issue_046.xml') + self.assertEqual( + schema.decode(xml_file, dict_class=ordered_dict_class, cdata_prefix='#'), + ordered_dict_class( + [('@xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'), + ('@xsi:noNamespaceSchemaLocation', 'issue_046.xsd'), + ('#1', 'Dear Mr.'), ('name', 'John Smith'), + ('#2', '.\n Your order'), ('orderid', 1032), + ('#3', 'will be shipped on'), ('shipdate', '2001-07-13'), ('#4', '.')] + )) + + def test_string_facets(self): + none_empty_string_type = self.st_schema.types['none_empty_string'] + self.check_decode(none_empty_string_type, '', XMLSchemaValidationError) + name_type = self.st_schema.types['NameType'] + self.check_decode(name_type, '', XMLSchemaValidationError) + + def test_binary_data_facets(self): + hex_code_type = self.st_schema.types['hexCode'] + self.check_decode(hex_code_type, u'00D7310A', u'00D7310A') + + base64_code_type = self.st_schema.types['base64Code'] + self.check_decode(base64_code_type, base64.b64encode(b'ok'), XMLSchemaValidationError) + base64_value = base64.b64encode(b'hello') + self.check_decode(base64_code_type, base64_value, base64_value.decode('utf-8')) + self.check_decode(base64_code_type, base64.b64encode(b'abcefgh'), u'YWJjZWZnaA==') + self.check_decode(base64_code_type, b' Y W J j ZWZ\t\tn\na A= =', u'Y W J j ZWZ n a A= =') + self.check_decode(base64_code_type, u' Y W J j ZWZ\t\tn\na A= =', u'Y W J j ZWZ n a A= =') + self.check_decode(base64_code_type, base64.b64encode(b'abcefghi'), u'YWJjZWZnaGk=') + + self.check_decode(base64_code_type, u'YWJjZWZnaA=', XMLSchemaValidationError) + self.check_decode(base64_code_type, u'YWJjZWZna$==', XMLSchemaValidationError) + + base64_length4_type = self.st_schema.types['base64Length4'] + self.check_decode(base64_length4_type, base64.b64encode(b'abc'), XMLSchemaValidationError) + self.check_decode(base64_length4_type, base64.b64encode(b'abce'), u'YWJjZQ==') + self.check_decode(base64_length4_type, base64.b64encode(b'abcef'), XMLSchemaValidationError) + + base64_length5_type = self.st_schema.types['base64Length5'] + self.check_decode(base64_length5_type, base64.b64encode(b'1234'), XMLSchemaValidationError) + self.check_decode(base64_length5_type, base64.b64encode(b'12345'), u'MTIzNDU=') + self.check_decode(base64_length5_type, base64.b64encode(b'123456'), XMLSchemaValidationError) + + def test_decimal_type(self): + schema = self.get_schema(""" + + + + + + + """) + + self.check_decode(schema, '120.48', Decimal('120.48')) + self.check_decode(schema, '100.50', Decimal('100.50'), process_namespaces=False) + self.check_decode(schema, '100.49', XMLSchemaValidationError) + self.check_decode(schema, '120.48', 120.48, decimal_type=float) + # Issue #66 + self.check_decode(schema, '120.48', '120.48', decimal_type=str) + + def test_nillable(self): + # Issue #76 + xsd_string = """ + + + + + + + + + """ + xsd_schema = xmlschema.XMLSchema(xsd_string) + xml_string_1 = "0" + xml_string_2 = """ + + + + """ + self.assertTrue(xsd_schema.is_valid(source=xml_string_1, use_defaults=False)) + self.assertTrue(xsd_schema.is_valid(source=xml_string_2, use_defaults=False)) + obj = xsd_schema.decode(xml_string_2, use_defaults=False) + self.check_etree_elements(ElementTree.fromstring(xml_string_2), xsd_schema.encode(obj)) + + def test_default_namespace(self): + # Issue #77 + xs = xmlschema.XMLSchema(""" + + + """) + self.assertEqual(xs.to_dict("""bar""", + path='/foo', namespaces={'': 'http://example.com/foo'}), 'bar') + self.assertEqual(xs.to_dict("""bar""", + path='/foo', namespaces={'': 'http://example.com/foo'}), None) + + def test_complex_with_simple_content_restriction(self): + xs = self.schema_class(self.casepath('features/derivations/complex-with-simple-content-restriction.xsd')) + self.assertTrue(xs.is_valid('10')) + self.assertFalse(xs.is_valid('alpha')) + self.assertEqual(xs.decode('10'), 10) + + def test_union_types(self): + # For testing issue #103 + decimal_or_nan = self.st_schema.types['myType'] + self.check_decode(decimal_or_nan, '95.0', Decimal('95.0')) + self.check_decode(decimal_or_nan, 'NaN', u'NaN') + + def test_default_values(self): + # From issue #108 + xsd_text = """ + + + + + + + + + + + + + """ + + schema = self.schema_class(xsd_text) + self.assertEqual(schema.to_dict("text"), + {'@attrWithDefault': 'default_value', + '@attrWithFixed': 'fixed_value', + '$': 'text'}) + self.assertEqual(schema.to_dict(""), + {'@attrWithDefault': 'default_value', + '@attrWithFixed': 'fixed_value', + '$': 'default_value'}) + self.assertEqual(schema.to_dict("""text"""), + {'$': 'text', + '@attr': 'attr_value', + '@attrWithDefault': 'default_value', + '@attrWithFixed': 'fixed_value'}) + + self.assertEqual(schema.to_dict("text", use_defaults=False), + {'@attrWithFixed': 'fixed_value', '$': 'text'}) + self.assertEqual(schema.to_dict("""text""", use_defaults=False), + {'$': 'text', '@attr': 'attr_value', '@attrWithFixed': 'fixed_value'}) + self.assertEqual(schema.to_dict("", use_defaults=False), {'@attrWithFixed': 'fixed_value'}) + + self.assertEqual(schema.to_dict(""), 'default_value') + self.assertIsNone(schema.to_dict("", use_defaults=False)) + + def test_validation_errors(self): + xsd_text = """ + + + + + + + + + + + + """ + + schema = self.schema_class(xsd_text) + + self.assertIsNone(schema.to_dict("alpha", validation='lax')[0]) + self.assertEqual(schema.to_dict("20"), {'@int_attr': 10, '$': 20}) + self.assertEqual(schema.to_dict("20", validation='lax')[0], + {'@int_attr': None, '$': 20}) + self.assertEqual(schema.to_dict("20", validation='skip'), + {'@int_attr': 'wrong', '$': 20}) + + def test_error_message(self): + schema = self.schema_class(self.casepath('issues/issue_115/Rotation.xsd')) + rotation_data = '' + + message_lines = [] + try: + schema.decode(rotation_data) + except Exception as err: + message_lines = unicode_type(err).split('\n') + + self.assertTrue(message_lines, msg="Empty error message!") + self.assertEqual(message_lines[-6], 'Instance:') + self.assertEqual(message_lines[-4].strip(), rotation_data) + self.assertEqual(message_lines[-2], 'Path: /tns:rotation') + + +class TestDecoding11(TestDecoding): + schema_class = XMLSchema11 + + def test_datetime_types(self): + xs = self.get_schema('') + self.assertEqual(xs.decode('
2019-01-01T13:40:00
'), '2019-01-01T13:40:00') + self.assertEqual(xs.decode('
2019-01-01T13:40:00
', datetime_types=True), + datatypes.DateTime.fromstring('2019-01-01T13:40:00')) + + xs = self.get_schema('') + self.assertEqual(xs.decode('
2001-04-15
'), '2001-04-15') + self.assertEqual(xs.decode('
2001-04-15
', datetime_types=True), + datatypes.Date.fromstring('2001-04-15')) + + def test_derived_duration_types(self): + xs = self.get_schema('') + self.assertEqual(xs.decode('P0Y4M'), 'P0Y4M') + self.assertEqual(xs.decode('P2Y10M', datetime_types=True), + datatypes.Duration.fromstring('P2Y10M')) + + xs = self.get_schema('') + self.assertEqual(xs.decode('P2DT6H30M30.001S'), 'P2DT6H30M30.001S') + self.assertEqual(xs.decode('P2DT26H'), 'P2DT26H') + self.assertEqual(xs.decode('P2DT6H30M30.001S', datetime_types=True), + datatypes.Duration.fromstring('P2DT6H30M30.001S')) + + def test_type_alternatives(self): + xs = self.schema_class(self.casepath('features/elements/type_alternatives-no-ns.xsd')) + self.assertTrue(xs.is_valid('10')) + self.assertFalse(xs.is_valid('10.1')) + self.assertTrue(xs.is_valid('10.1')) + self.assertFalse(xs.is_valid('alpha')) + self.assertFalse(xs.is_valid('alpha')) + self.assertTrue(xs.is_valid('0')) + self.assertTrue(xs.is_valid('true')) + + xs = self.schema_class(self.casepath('features/elements/type_alternatives.xsd')) + self.assertTrue(xs.is_valid('10')) + self.assertFalse(xs.is_valid('10.1')) + self.assertTrue(xs.is_valid('10.1')) + self.assertFalse(xs.is_valid('alpha')) + self.assertFalse(xs.is_valid('alpha')) + self.assertTrue(xs.is_valid('0')) + self.assertTrue(xs.is_valid('true')) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py new file mode 100644 index 0000000..ffa6623 --- /dev/null +++ b/xmlschema/tests/validation/test_encoding.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import sys +import unittest + +from xmlschema import XMLSchemaEncodeError, XMLSchemaValidationError +from xmlschema.converters import UnorderedConverter +from xmlschema.compat import unicode_type, ordered_dict_class +from xmlschema.qnames import local_name +from xmlschema.etree import etree_element, etree_tostring, ElementTree +from xmlschema.validators.exceptions import XMLSchemaChildrenValidationError +from xmlschema.helpers import is_etree_element +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestEncoding(XsdValidatorTestCase): + + def check_encode(self, xsd_component, data, expected, **kwargs): + if isinstance(expected, type) and issubclass(expected, Exception): + self.assertRaises(expected, xsd_component.encode, data, **kwargs) + elif is_etree_element(expected): + elem = xsd_component.encode(data, **kwargs) + self.check_etree_elements(expected, elem) + else: + obj = xsd_component.encode(data, **kwargs) + if isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[1], list): + self.assertEqual(expected, obj[0]) + self.assertTrue(isinstance(obj[0], type(expected))) + elif is_etree_element(obj): + namespaces = kwargs.pop('namespaces', self.default_namespaces) + self.assertEqual(expected, etree_tostring(obj, namespaces=namespaces).strip()) + else: + self.assertEqual(expected, obj) + self.assertTrue(isinstance(obj, type(expected))) + + def test_decode_encode(self): + """Test encode after a decode, checking the re-encoded tree.""" + filename = self.casepath('examples/collection/collection.xml') + xt = ElementTree.parse(filename) + xd = self.col_schema.to_dict(filename, dict_class=ordered_dict_class) + elem = self.col_schema.encode(xd, path='./col:collection', namespaces=self.col_namespaces) + + self.assertEqual( + len([e for e in elem.iter()]), 20, + msg="The encoded tree must have 20 elements as the origin." + ) + self.assertTrue(all( + local_name(e1.tag) == local_name(e2.tag) + for e1, e2 in zip(elem.iter(), xt.getroot().iter()) + )) + + def test_string_based_builtin_types(self): + self.check_encode(self.xsd_types['string'], 'sample string ', u'sample string ') + self.check_encode(self.xsd_types['normalizedString'], ' sample string ', u' sample string ') + self.check_encode(self.xsd_types['normalizedString'], '\n\r sample\tstring\n', u' sample string ') + self.check_encode(self.xsd_types['token'], '\n\r sample\t\tstring\n ', u'sample string') + self.check_encode(self.xsd_types['language'], 'sample string', XMLSchemaValidationError) + self.check_encode(self.xsd_types['language'], ' en ', u'en') + self.check_encode(self.xsd_types['Name'], 'first_name', u'first_name') + self.check_encode(self.xsd_types['Name'], ' first_name ', u'first_name') + self.check_encode(self.xsd_types['Name'], 'first name', XMLSchemaValidationError) + self.check_encode(self.xsd_types['Name'], '1st_name', XMLSchemaValidationError) + self.check_encode(self.xsd_types['Name'], 'first_name1', u'first_name1') + self.check_encode(self.xsd_types['Name'], 'first:name', u'first:name') + self.check_encode(self.xsd_types['NCName'], 'first_name', u'first_name') + self.check_encode(self.xsd_types['NCName'], 'first:name', XMLSchemaValidationError) + self.check_encode(self.xsd_types['ENTITY'], 'first:name', XMLSchemaValidationError) + self.check_encode(self.xsd_types['ID'], 'first:name', XMLSchemaValidationError) + self.check_encode(self.xsd_types['IDREF'], 'first:name', XMLSchemaValidationError) + + def test_decimal_based_builtin_types(self): + self.check_encode(self.xsd_types['decimal'], -99.09, u'-99.09') + self.check_encode(self.xsd_types['decimal'], '-99.09', u'-99.09') + self.check_encode(self.xsd_types['integer'], 1000, u'1000') + self.check_encode(self.xsd_types['integer'], 100.0, XMLSchemaEncodeError) + self.check_encode(self.xsd_types['integer'], 100.0, u'100', validation='lax') + self.check_encode(self.xsd_types['short'], 1999, u'1999') + self.check_encode(self.xsd_types['short'], 10000000, XMLSchemaValidationError) + self.check_encode(self.xsd_types['float'], 100.0, u'100.0') + self.check_encode(self.xsd_types['float'], 'hello', XMLSchemaEncodeError) + self.check_encode(self.xsd_types['double'], -4531.7, u'-4531.7') + self.check_encode(self.xsd_types['positiveInteger'], -1, XMLSchemaValidationError) + self.check_encode(self.xsd_types['positiveInteger'], 0, XMLSchemaValidationError) + self.check_encode(self.xsd_types['nonNegativeInteger'], 0, u'0') + self.check_encode(self.xsd_types['nonNegativeInteger'], -1, XMLSchemaValidationError) + self.check_encode(self.xsd_types['negativeInteger'], -100, u'-100') + self.check_encode(self.xsd_types['nonPositiveInteger'], 7, XMLSchemaValidationError) + self.check_encode(self.xsd_types['unsignedLong'], 101, u'101') + self.check_encode(self.xsd_types['unsignedLong'], -101, XMLSchemaValidationError) + self.check_encode(self.xsd_types['nonPositiveInteger'], 7, XMLSchemaValidationError) + + def test_list_builtin_types(self): + self.check_encode(self.xsd_types['IDREFS'], ['first_name'], u'first_name') + self.check_encode(self.xsd_types['IDREFS'], 'first_name', u'first_name') # Transform data to list + self.check_encode(self.xsd_types['IDREFS'], ['one', 'two', 'three'], u'one two three') + self.check_encode(self.xsd_types['IDREFS'], [1, 'two', 'three'], XMLSchemaValidationError) + self.check_encode(self.xsd_types['NMTOKENS'], ['one', 'two', 'three'], u'one two three') + self.check_encode(self.xsd_types['ENTITIES'], ('mouse', 'cat', 'dog'), u'mouse cat dog') + + def test_datetime_builtin_type(self): + xs = self.get_schema('') + dt = xs.decode('
2019-01-01T13:40:00
', datetime_types=True) + self.assertEqual(etree_tostring(xs.encode(dt)), '
2019-01-01T13:40:00
') + + def test_date_builtin_type(self): + xs = self.get_schema('') + date = xs.decode('
2001-04-15
', datetime_types=True) + self.assertEqual(etree_tostring(xs.encode(date)), '
2001-04-15
') + + def test_duration_builtin_type(self): + xs = self.get_schema('') + duration = xs.decode('P5Y3MT60H30.001S', datetime_types=True) + self.assertEqual(etree_tostring(xs.encode(duration)), 'P5Y3M2DT12H30.001S') + + def test_gregorian_year_builtin_type(self): + xs = self.get_schema('') + gyear = xs.decode('2000', datetime_types=True) + self.assertEqual(etree_tostring(xs.encode(gyear)), '2000') + + def test_gregorian_yearmonth_builtin_type(self): + xs = self.get_schema('') + gyear_month = xs.decode('2000-12', datetime_types=True) + self.assertEqual(etree_tostring(xs.encode(gyear_month)), '2000-12') + + def test_list_types(self): + list_of_strings = self.st_schema.types['list_of_strings'] + self.check_encode(list_of_strings, (10, 25, 40), u'', validation='lax') + self.check_encode(list_of_strings, (10, 25, 40), u'10 25 40', validation='skip') + self.check_encode(list_of_strings, ['a', 'b', 'c'], u'a b c', validation='skip') + + list_of_integers = self.st_schema.types['list_of_integers'] + self.check_encode(list_of_integers, (10, 25, 40), u'10 25 40') + self.check_encode(list_of_integers, (10, 25.0, 40), XMLSchemaValidationError) + self.check_encode(list_of_integers, (10, 25.0, 40), u'10 25 40', validation='lax') + + list_of_floats = self.st_schema.types['list_of_floats'] + self.check_encode(list_of_floats, [10.1, 25.0, 40.0], u'10.1 25.0 40.0') + self.check_encode(list_of_floats, [10.1, 25, 40.0], u'10.1 25.0 40.0', validation='lax') + self.check_encode(list_of_floats, [10.1, False, 40.0], u'10.1 0.0 40.0', validation='lax') + + list_of_booleans = self.st_schema.types['list_of_booleans'] + self.check_encode(list_of_booleans, [True, False, True], u'true false true') + self.check_encode(list_of_booleans, [10, False, True], XMLSchemaEncodeError) + self.check_encode(list_of_booleans, [True, False, 40.0], u'true false', validation='lax') + self.check_encode(list_of_booleans, [True, False, 40.0], u'true false 40.0', validation='skip') + + def test_union_types(self): + integer_or_float = self.st_schema.types['integer_or_float'] + self.check_encode(integer_or_float, -95, u'-95') + self.check_encode(integer_or_float, -95.0, u'-95.0') + self.check_encode(integer_or_float, True, XMLSchemaEncodeError) + self.check_encode(integer_or_float, True, u'1', validation='lax') + + integer_or_string = self.st_schema.types['integer_or_string'] + self.check_encode(integer_or_string, 89, u'89') + self.check_encode(integer_or_string, 89.0, u'89', validation='lax') + self.check_encode(integer_or_string, 89.0, XMLSchemaEncodeError) + self.check_encode(integer_or_string, False, XMLSchemaEncodeError) + self.check_encode(integer_or_string, "Venice ", u'Venice ') + + boolean_or_integer_or_string = self.st_schema.types['boolean_or_integer_or_string'] + self.check_encode(boolean_or_integer_or_string, 89, u'89') + self.check_encode(boolean_or_integer_or_string, 89.0, u'89', validation='lax') + self.check_encode(boolean_or_integer_or_string, 89.0, XMLSchemaEncodeError) + self.check_encode(boolean_or_integer_or_string, False, u'false') + self.check_encode(boolean_or_integer_or_string, "Venice ", u'Venice ') + + def test_simple_elements(self): + elem = etree_element('A') + elem.text = '89' + self.check_encode(self.get_element('A', type='xs:string'), '89', elem) + self.check_encode(self.get_element('A', type='xs:integer'), 89, elem) + elem.text = '-10.4' + self.check_encode(self.get_element('A', type='xs:float'), -10.4, elem) + elem.text = 'false' + self.check_encode(self.get_element('A', type='xs:boolean'), False, elem) + elem.text = 'true' + self.check_encode(self.get_element('A', type='xs:boolean'), True, elem) + + self.check_encode(self.get_element('A', type='xs:short'), 128000, XMLSchemaValidationError) + elem.text = '0' + self.check_encode(self.get_element('A', type='xs:nonNegativeInteger'), 0, elem) + self.check_encode(self.get_element('A', type='xs:nonNegativeInteger'), '0', XMLSchemaValidationError) + self.check_encode(self.get_element('A', type='xs:positiveInteger'), 0, XMLSchemaValidationError) + elem.text = '-1' + self.check_encode(self.get_element('A', type='xs:negativeInteger'), -1, elem) + self.check_encode(self.get_element('A', type='xs:nonNegativeInteger'), -1, XMLSchemaValidationError) + + def test_complex_elements(self): + schema = self.get_schema(""" + + + + + + + + + + """) + self.check_encode( + schema.elements['A'], data={'@a1': 10, '@a2': -1, '$': 'simple '}, + expected='simple ', + ) + self.check_encode( + schema.elements['A'], {'@a1': 10, '@a2': -1, '$': 'simple '}, + ElementTree.fromstring('simple '), + ) + self.check_encode( + schema.elements['A'], {'@a1': 10, '@a2': -1}, + ElementTree.fromstring('') + ) + self.check_encode( + schema.elements['A'], {'@a1': 10, '$': 'simple '}, + ElementTree.fromstring('simple ') + ) + self.check_encode(schema.elements['A'], {'@a2': -1, '$': 'simple '}, XMLSchemaValidationError) + + schema = self.get_schema(""" + + + + + + + + + """) + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('B3', False)]), + expected=u'\nabc\n10\nfalse\n', + indent=0, + ) + self.check_encode(schema.elements['A'], {'B1': 'abc', 'B2': 10, 'B4': False}, XMLSchemaValidationError) + + def test_error_message(self): + schema = self.schema_class(self.casepath('issues/issue_115/Rotation.xsd')) + rotation_data = { + "@roll": 0.0, + "@pitch": 0.0, + "@yaw": -1.0 # <----- invalid value, must be between 0 and 360 + } + + message_lines = [] + try: + schema.encode(rotation_data) + except Exception as err: + message_lines = unicode_type(err).split('\n') + + self.assertTrue(message_lines, msg="Empty error message!") + self.assertEqual(message_lines[-4], 'Instance:') + if sys.version_info < (3, 8): + text = '' + else: + text = '' + self.assertEqual(message_lines[-2].strip(), text) + + def test_max_occurs_sequence(self): + # Issue #119 + schema = self.get_schema(""" + + + + + + + """) + + # Check validity + self.assertIsNone(schema.validate("1")) + self.assertIsNone(schema.validate("12")) + with self.assertRaises(XMLSchemaChildrenValidationError): + schema.validate("123") + + self.assertTrue(is_etree_element(schema.to_etree({'A': 1}, path='foo'))) + self.assertTrue(is_etree_element(schema.to_etree({'A': [1]}, path='foo'))) + self.assertTrue(is_etree_element(schema.to_etree({'A': [1, 2]}, path='foo'))) + with self.assertRaises(XMLSchemaChildrenValidationError): + schema.to_etree({'A': [1, 2, 3]}, path='foo') + + schema = self.get_schema(""" + + + + + + + + """) + + self.assertTrue(is_etree_element(schema.to_etree({'A': [1, 2]}, path='foo'))) + with self.assertRaises(XMLSchemaChildrenValidationError): + schema.to_etree({'A': [1, 2, 3]}, path='foo') + + def test_encode_unordered_content(self): + schema = self.get_schema(""" + + + + + + + + + """) + + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B2', 10), ('B1', 'abc'), ('B3', True)]), + expected=XMLSchemaChildrenValidationError + ) + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B2', 10), ('B1', 'abc'), ('B3', True)]), + expected=u'\nabc\n10\ntrue\n', + indent=0, cdata_prefix='#', converter=UnorderedConverter + ) + + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), + expected='\nhelloabc\n10\ntrue\n', + indent=0, cdata_prefix='#', converter=UnorderedConverter + ) + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), + expected=u'\nabc\n10\nhello\ntrue\n', + indent=0, cdata_prefix='#' + ) + self.check_encode( + xsd_component=schema.elements['A'], + data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello')]), + expected=XMLSchemaValidationError, indent=0, cdata_prefix='#' + ) + + def test_strict_trailing_content(self): + """Too many elements for a group raises an exception.""" + schema = self.get_schema(""" + + + + + + + + """) + self.check_encode( + schema.elements['foo'], + data={"A": [1, 2, 3]}, + expected=XMLSchemaChildrenValidationError, + ) + + def test_unordered_converter_repeated_sequence_of_elements(self): + schema = self.get_schema(""" + + + + + + + + + """) + + root = schema.to_etree(ordered_dict_class([('A', [1, 2]), ('B', [3, 4])])) + self.assertListEqual([e.text for e in root], ['1', '3', '2', '4']) + + root = schema.to_etree({"A": [1, 2], "B": [3, 4]}, converter=UnorderedConverter) + self.assertListEqual([e.text for e in root], ['1', '3', '2', '4']) + + root = schema.to_etree({"A": [1, 2], "B": [3, 4]}, unordered=True) + self.assertListEqual([e.text for e in root], ['1', '3', '2', '4']) + + +class TestEncoding11(TestEncoding): + schema_class = XMLSchema11 + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validation/test_validation.py b/xmlschema/tests/validation/test_validation.py new file mode 100644 index 0000000..3ba4ba4 --- /dev/null +++ b/xmlschema/tests/validation/test_validation.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +import unittest + +import xmlschema +from xmlschema import XMLSchemaValidationError + +from xmlschema.etree import ElementTree, lxml_etree +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestValidation(XsdValidatorTestCase): + + def check_validity(self, xsd_component, data, expected, use_defaults=True): + if isinstance(expected, type) and issubclass(expected, Exception): + self.assertRaises(expected, xsd_component.is_valid, data, use_defaults=use_defaults) + elif expected: + self.assertTrue(xsd_component.is_valid(data, use_defaults=use_defaults)) + else: + self.assertFalse(xsd_component.is_valid(data, use_defaults=use_defaults)) + + @unittest.skipIf(lxml_etree is None, "The lxml library is not available.") + def test_lxml(self): + xs = xmlschema.XMLSchema(self.casepath('examples/vehicles/vehicles.xsd')) + xt1 = lxml_etree.parse(self.casepath('examples/vehicles/vehicles.xml')) + xt2 = lxml_etree.parse(self.casepath('examples/vehicles/vehicles-1_error.xml')) + self.assertTrue(xs.is_valid(xt1)) + self.assertFalse(xs.is_valid(xt2)) + self.assertTrue(xs.validate(xt1) is None) + self.assertRaises(xmlschema.XMLSchemaValidationError, xs.validate, xt2) + + def test_issue_064(self): + self.check_validity(self.st_schema, '', False) + + def test_document_validate_api(self): + self.assertIsNone(xmlschema.validate(self.vh_xml_file)) + self.assertIsNone(xmlschema.validate(self.vh_xml_file, use_defaults=False)) + + vh_2_file = self.casepath('examples/vehicles/vehicles-2_errors.xml') + self.assertRaises(XMLSchemaValidationError, xmlschema.validate, vh_2_file) + + try: + xmlschema.validate(vh_2_file, namespaces={'vhx': "http://example.com/vehicles"}) + except XMLSchemaValidationError as err: + path_line = str(err).splitlines()[-1] + else: + path_line = '' + self.assertEqual('Path: /vhx:vehicles/vhx:cars', path_line) + + # Issue #80 + vh_2_xt = ElementTree.parse(vh_2_file) + self.assertRaises(XMLSchemaValidationError, xmlschema.validate, vh_2_xt, self.vh_xsd_file) + + def test_document_validate_api_lazy(self): + source = xmlschema.XMLResource(self.col_xml_file, lazy=True) + namespaces = source.get_namespaces() + source.root[0].clear() # Drop internal elements + source.root[1].clear() + xsd_element = self.col_schema.elements['collection'] + + self.assertRaises(XMLSchemaValidationError, xsd_element.decode, source.root, namespaces=namespaces) + + # Testing adding 'no_depth' argument + for result in xsd_element.iter_decode(source.root, 'strict', namespaces=namespaces, + source=source, no_depth=True): + del result + + self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True)) + + +class TestValidation11(TestValidation): + schema_class = XMLSchema11 + + def test_default_attributes(self): + """ + Root Node + """ + xs = self.schema_class(self.casepath('features/attributes/default_attributes.xsd')) + self.assertTrue(xs.is_valid("" + " alpha" + " beta" + "")) + self.assertFalse(xs.is_valid("" + " alpha" # Misses required attribute + " beta" + "")) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/__init__.py b/xmlschema/tests/validators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/xmlschema/tests/validators/test_attributes.py b/xmlschema/tests/validators/test_attributes.py new file mode 100644 index 0000000..19fe05e --- /dev/null +++ b/xmlschema/tests/validators/test_attributes.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest + +from xmlschema import XMLSchemaParseError +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestXsdAttributes(XsdValidatorTestCase): + + def test_wrong_attribute(self): + self.check_schema(""" + + + + + """, XMLSchemaParseError) + + def test_wrong_attribute_group(self): + self.check_schema(""" + + + + + """, XMLSchemaParseError) + + schema = self.check_schema(""" + + + + + """, validation='lax') + self.assertTrue(isinstance(schema.all_errors[1], XMLSchemaParseError)) + + +class TestXsd11Attributes(TestXsdAttributes): + + schema_class = XMLSchema11 + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/test_complex_types.py b/xmlschema/tests/validators/test_complex_types.py new file mode 100644 index 0000000..263f02c --- /dev/null +++ b/xmlschema/tests/validators/test_complex_types.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest + +from xmlschema import XMLSchemaParseError, XMLSchemaModelError +from xmlschema.etree import etree_element +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestXsdComplexType(XsdValidatorTestCase): + + def check_complex_restriction(self, base, restriction, expected=None, **kwargs): + content = 'complex' if self.content_pattern.search(base) else 'simple' + source = """ + + {0} + + + + + {2} + + + + """.format(base.strip(), content, restriction.strip()) + self.check_schema(source, expected, **kwargs) + + def test_element_restrictions(self): + base = """ + + + + + + """ + self.check_complex_restriction( + base, restriction=""" + + + + + + """ + ) + self.check_complex_restriction( + base, restriction=""" + + + + + + """, expected=XMLSchemaParseError + ) + self.check_complex_restriction( + base, restriction=""" + + + + + + """, expected=XMLSchemaParseError + ) + self.check_complex_restriction( + base, restriction=""" + + + + + + """, expected=XMLSchemaParseError + ) + self.check_complex_restriction( + base, restriction=""" + + + + + + """, expected=XMLSchemaParseError + ) + + def test_sequence_group_restriction(self): + # Meaningless sequence group + base = """ + + + + + + + """ + self.check_complex_restriction( + base, restriction=""" + + + + + """ + ) + self.check_complex_restriction( + base, restriction=""" + + + + + """, expected=XMLSchemaParseError + ) + + base = """ + + + + + """ + self.check_complex_restriction(base, '') + self.check_complex_restriction( + base, '', XMLSchemaParseError + ) + self.check_complex_restriction( + base, '', XMLSchemaParseError + ) + self.check_complex_restriction( + base, '' + ) + self.check_complex_restriction( + base, '', XMLSchemaParseError + ) + self.check_complex_restriction( + base, '', + XMLSchemaParseError + ) + self.check_complex_restriction( + base, '', + XMLSchemaParseError + ) + + def test_all_group_restriction(self): + base = """ + + + + + + """ + self.check_complex_restriction( + base, restriction=""" + + + + + """) + self.check_complex_restriction( + base, restriction=""" + + + + + """, expected=XMLSchemaParseError if self.schema_class.XSD_VERSION == '1.0' else None + ) + self.check_complex_restriction( + base, restriction=""" + + + + + """) + self.check_complex_restriction( + base, '', + ) + self.check_complex_restriction( + base, restriction=""" + + + + + """, expected=XMLSchemaParseError + ) + self.check_complex_restriction( + base, restriction=""" + + + + + """, expected=XMLSchemaParseError + ) + + base = """ + + + + """ + self.check_complex_restriction(base, '', XMLSchemaParseError) + + def test_choice_group_restriction(self): + base = """ + + + + + + """ + self.check_complex_restriction(base, '') + self.check_complex_restriction( + base, '', + XMLSchemaParseError if self.schema_class.XSD_VERSION == '1.0' else None + ) + self.check_complex_restriction( + base, '', + ) + + def test_occurs_restriction(self): + base = """ + + + + """ + self.check_complex_restriction( + base, '') + self.check_complex_restriction( + base, '') + self.check_complex_restriction( + base, '', + XMLSchemaParseError + ) + self.check_complex_restriction( + base, '', + XMLSchemaParseError + ) + + def test_recursive_complex_type(self): + schema = self.schema_class(""" + + + + + + + + """) + self.assertEqual(schema.elements['elemA'].type, schema.types['typeA']) + + def test_upa_violations(self): + self.check_schema(""" + + + + + + + + + """, XMLSchemaModelError) + + self.check_schema(""" + + + + + + + + + """) + + def test_upa_violation_with_wildcard(self): + self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + + """, XMLSchemaModelError if self.schema_class.XSD_VERSION == '1.0' else None) + + +class TestXsd11ComplexType(TestXsdComplexType): + + schema_class = XMLSchema11 + + def test_complex_type_assertion(self): + schema = self.check_schema(""" + + + + + """) + + xsd_type = schema.types['intRange'] + xsd_type.decode(etree_element('a', attrib={'min': '10', 'max': '19'})) + self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '10', 'max': '19'}))) + self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '19', 'max': '19'}))) + self.assertFalse(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '19'}))) + self.assertTrue(xsd_type.is_valid(etree_element('a', attrib={'min': '25', 'max': '100'}))) + + def test_sequence_extension(self): + schema = self.schema_class(""" + + + + + + + + + + + + + + + + + + + + + + """) + + base_group = schema.types['base'].content_type + self.assertEqual(base_group.model, 'sequence') + self.assertEqual(base_group[0].name, 'a') + self.assertEqual(base_group[1].name, 'b') + self.assertEqual(base_group[2].name, 'c') + self.assertEqual(len(base_group), 3) + + ext_group = schema.types['ext'].content_type + self.assertEqual(ext_group.model, 'sequence') + self.assertEqual(len(ext_group), 2) + self.assertEqual(ext_group[0].model, 'sequence') + self.assertEqual(ext_group[1].model, 'sequence') + self.assertEqual(ext_group[0][0].name, 'a') + self.assertEqual(ext_group[0][1].name, 'b') + self.assertEqual(ext_group[0][2].name, 'c') + self.assertEqual(len(ext_group[0]), 3) + self.assertEqual(ext_group[1][0].name, 'd') + self.assertEqual(len(ext_group[1]), 1) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/test_identities.py b/xmlschema/tests/validators/test_identities.py new file mode 100644 index 0000000..15fbcec --- /dev/null +++ b/xmlschema/tests/validators/test_identities.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest + +from xmlschema import XMLSchemaParseError +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestXsdIdentities(XsdValidatorTestCase): + + def test_key_definition(self): + self.check_schema(""" + + + + + + + """) + + self.check_schema(""" + + + + + + + + + + + + + """, XMLSchemaParseError) + + +class TestXsd11Identities(TestXsdIdentities): + + schema_class = XMLSchema11 + + def test_ref_definition(self): + self.check_schema(""" + + + + + + + + + + """) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/test_schema_class.py b/xmlschema/tests/validators/test_schema_class.py new file mode 100644 index 0000000..fb94fe1 --- /dev/null +++ b/xmlschema/tests/validators/test_schema_class.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest +import platform +import warnings + +from xmlschema import XMLSchemaParseError, XMLSchemaIncludeWarning, XMLSchemaImportWarning +from xmlschema.etree import etree_element +from xmlschema.qnames import XSD_ELEMENT, XSI_TYPE +from xmlschema.tests import SKIP_REMOTE_TESTS, XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestXMLSchema10(XsdValidatorTestCase): + + def test_schema_copy(self): + schema = self.vh_schema.copy() + self.assertNotEqual(id(self.vh_schema), id(schema)) + self.assertNotEqual(id(self.vh_schema.namespaces), id(schema.namespaces)) + self.assertNotEqual(id(self.vh_schema.maps), id(schema.maps)) + + def test_resolve_qname(self): + schema = self.schema_class(""" + + + """) + self.assertEqual(schema.resolve_qname('xs:element'), XSD_ELEMENT) + self.assertEqual(schema.resolve_qname('xsi:type'), XSI_TYPE) + + self.assertEqual(schema.resolve_qname(XSI_TYPE), XSI_TYPE) + self.assertEqual(schema.resolve_qname('element'), 'element') + self.assertRaises(ValueError, schema.resolve_qname, '') + self.assertRaises(ValueError, schema.resolve_qname, 'xsi:a type ') + self.assertRaises(ValueError, schema.resolve_qname, 'xml::lang') + + def test_global_group_definitions(self): + schema = self.check_schema(""" + + + """, validation='lax') + self.assertEqual(len(schema.errors), 1) + + self.check_schema('', XMLSchemaParseError) + self.check_schema('', XMLSchemaParseError) + + def test_wrong_includes_and_imports(self): + + with warnings.catch_warnings(record=True) as context: + warnings.simplefilter("always") + self.check_schema(""" + + + + + + + + """) + self.assertEqual(len(context), 3, "Wrong number of include/import warnings") + self.assertEqual(context[0].category, XMLSchemaIncludeWarning) + self.assertEqual(context[1].category, XMLSchemaIncludeWarning) + self.assertEqual(context[2].category, XMLSchemaImportWarning) + self.assertTrue(str(context[0].message).startswith("Include")) + self.assertTrue(str(context[1].message).startswith("Redefine")) + self.assertTrue(str(context[2].message).startswith("Namespace import")) + + def test_wrong_references(self): + # Wrong namespace for element type's reference + self.check_schema(""" + + + + + """, XMLSchemaParseError) + + def test_annotations(self): + schema = self.check_schema(""" + + + """) + self.assertIsNotNone(schema.elements['foo'].annotation) + + schema = self.check_schema(""" + + + stuff + + + + + """) + self.assertIsNotNone(schema.types["Magic"].annotation) + + self.check_schema(""" + + + + + + + """, XMLSchemaParseError) + + def test_base_schemas(self): + from xmlschema.validators.schema import XML_SCHEMA_FILE + self.schema_class(XML_SCHEMA_FILE) + + def test_root_elements(self): + # Test issue #107 fix + schema = self.schema_class(""" + + + + + + + + + """) + + self.assertEqual(set(schema.root_elements), {schema.elements['root1'], schema.elements['root2']}) + + def test_is_restriction_method(self): + # Test issue #111 fix + schema = self.schema_class(source=self.casepath('issues/issue_111/issue_111.xsd')) + extended_header_def = schema.types['extendedHeaderDef'] + self.assertTrue(extended_header_def.is_derived(schema.types['blockDef'])) + + @unittest.skipIf(SKIP_REMOTE_TESTS or platform.system() == 'Windows', + "Remote networks are not accessible or avoid SSL verification error on Windows.") + def test_remote_schemas_loading(self): + col_schema = self.schema_class("https://raw.githubusercontent.com/brunato/xmlschema/master/" + "xmlschema/tests/test_cases/examples/collection/collection.xsd") + self.assertTrue(isinstance(col_schema, self.schema_class)) + vh_schema = self.schema_class("https://raw.githubusercontent.com/brunato/xmlschema/master/" + "xmlschema/tests/test_cases/examples/vehicles/vehicles.xsd") + self.assertTrue(isinstance(vh_schema, self.schema_class)) + + def test_schema_defuse(self): + vh_schema = self.schema_class(self.vh_xsd_file, defuse='always') + self.assertIsInstance(vh_schema.root, etree_element) + for schema in vh_schema.maps.iter_schemas(): + self.assertIsInstance(schema.root, etree_element) + + +class TestXMLSchema11(TestXMLSchema10): + + schema_class = XMLSchema11 + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/test_simple_types.py b/xmlschema/tests/validators/test_simple_types.py new file mode 100644 index 0000000..0b7be36 --- /dev/null +++ b/xmlschema/tests/validators/test_simple_types.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest + +from xmlschema import XMLSchemaParseError +from xmlschema.qnames import XSD_LIST, XSD_UNION +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11 + + +class TestXsdSimpleTypes(XsdValidatorTestCase): + + def test_simple_types(self): + # Issue #54: set list or union schema element. + xs = self.check_schema(""" + + + + + + + + + """) + xs.types['test_list'].elem = xs.root[0] # elem.tag == 'simpleType' + self.assertEqual(xs.types['test_list'].elem.tag, XSD_LIST) + xs.types['test_union'].elem = xs.root[1] # elem.tag == 'simpleType' + self.assertEqual(xs.types['test_union'].elem.tag, XSD_UNION) + + def test_final_attribute(self): + self.check_schema(""" + + + + """) + + def test_facets(self): + # Issue #55 and a near error (derivation from xs:integer) + self.check_schema(""" + + + + + + + + + + + + + """) + self.check_schema(""" + + + + + + + """, XMLSchemaParseError) + + # Issue #56 + self.check_schema(""" + + + + + + + + + + + """) + + def test_union_restrictions(self): + # Wrong union restriction (not admitted facets, see issue #67) + self.check_schema(r""" + + + + + + + + + + + + + + + """, XMLSchemaParseError) + + def test_date_time_facets(self): + self.check_schema(""" + + + + + + """) + + self.check_schema(""" + + + + + + """) + + +class TestXsd11SimpleTypes(TestXsdSimpleTypes): + + schema_class = XMLSchema11 + + def test_explicit_timezone_facet(self): + schema = self.check_schema(""" + + + + + + + + + + + + + + + + """) + self.assertTrue(schema.types['req-tz-date'].is_valid('2002-10-10-05:00')) + self.assertTrue(schema.types['req-tz-date'].is_valid('2002-10-10Z')) + self.assertFalse(schema.types['req-tz-date'].is_valid('2002-10-10')) + + def test_assertion_facet(self): + self.check_schema(""" + + + + + """, XMLSchemaParseError) + + schema = self.check_schema(""" + + + + + """) + self.assertTrue(schema.types['MeasureType'].is_valid('10')) + self.assertFalse(schema.types['MeasureType'].is_valid('-1.5')) + + self.check_schema(""" + + + + + """, XMLSchemaParseError) + + schema = self.check_schema(""" + + + + + """) + self.assertTrue(schema.types['RestrictedDateTimeType'].is_valid('2000-01-01T12:00:00')) + + schema = self.check_schema(""" + + + + + + """) + self.assertTrue(schema.types['Percentage'].is_valid('10')) + self.assertTrue(schema.types['Percentage'].is_valid('100')) + self.assertTrue(schema.types['Percentage'].is_valid('0')) + self.assertFalse(schema.types['Percentage'].is_valid('-1')) + self.assertFalse(schema.types['Percentage'].is_valid('101')) + self.assertFalse(schema.types['Percentage'].is_valid('90.1')) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/tests/validators/test_wildcards.py b/xmlschema/tests/validators/test_wildcards.py new file mode 100644 index 0000000..e8ebce5 --- /dev/null +++ b/xmlschema/tests/validators/test_wildcards.py @@ -0,0 +1,730 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). +# All rights reserved. +# This file is distributed under the terms of the MIT License. +# See the file 'LICENSE' in the root directory of the present +# distribution, or http://opensource.org/licenses/MIT. +# +# @author Davide Brunato +# +from __future__ import print_function, unicode_literals +import unittest + +from xmlschema import XMLSchemaParseError +from xmlschema.tests import XsdValidatorTestCase +from xmlschema.validators import XMLSchema11, XsdDefaultOpenContent + + +class TestXsdWildcards(XsdValidatorTestCase): + + def test_overlap(self): + schema = self.schema_class(""" + + + + + + + + + """) + + any1, any2, any3 = schema.groups['group1'][:] + + self.assertFalse(any1.is_overlap(any2)) + self.assertFalse(any2.is_overlap(any1)) + self.assertTrue(any3.is_matching('{foo}x')) + self.assertTrue(any3.is_matching('{bar}x')) + self.assertTrue(any3.is_matching('{tns1}x')) + + def test_any_wildcard(self): + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].namespace, ['##other']) + + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].namespace, ['']) + + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].namespace, ['ns', '']) + + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].namespace, ['tns2', 'tns1', 'tns3']) + self.assertEqual(schema.types['taggedType'].content_type[-1].min_occurs, 1) + self.assertEqual(schema.types['taggedType'].content_type[-1].max_occurs, 1) + + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].namespace, ('##any',)) + self.assertEqual(schema.types['taggedType'].content_type[-1].min_occurs, 10) + self.assertIsNone(schema.types['taggedType'].content_type[-1].max_occurs) + + def test_any_attribute_wildcard(self): + schema = self.check_schema(""" + + + + + + + """) + self.assertEqual(schema.types['taggedType'].attributes[None].namespace, ['tns1:foo']) + + schema = self.check_schema(""" + + + + + + + """) + self.assertEqual(schema.types['taggedType'].attributes[None].namespace, ['']) + + def test_namespace_variants(self): + schema = self.schema_class(""" + + + + + + + + """) + + any1 = schema.groups['group1'][0] + self.assertEqual(any1.namespace, ['urn:a']) + any2 = schema.groups['group1'][1] + self.assertEqual(any2.namespace, []) + + +class TestXsd11Wildcards(TestXsdWildcards): + + schema_class = XMLSchema11 + + def test_is_restriction(self): + schema = self.schema_class(""" + + + + + + + + + + + + + + + + + + """) + + any1, any2, any3 = schema.groups['group1'][:3] + + self.assertTrue(any1.is_restriction(any1)) + self.assertFalse(any1.is_restriction(any2)) + self.assertFalse(any1.is_restriction(any3)) + self.assertTrue(any2.is_restriction(any1)) + self.assertTrue(any2.is_restriction(any2)) + self.assertFalse(any2.is_restriction(any3)) + self.assertTrue(any3.is_restriction(any1)) + self.assertTrue(any3.is_restriction(any2)) + self.assertTrue(any3.is_restriction(any3)) + + any1, any2, any3 = schema.groups['group1'][3:6] + self.assertTrue(any1.is_restriction(any1)) + self.assertTrue(any2.is_restriction(any1)) + self.assertTrue(any3.is_restriction(any1)) + + any1, any2, any3 = schema.groups['group1'][6:9] + self.assertFalse(any2.is_restriction(any1)) + self.assertTrue(any3.is_restriction(any1)) + + def test_wildcard_union(self): + schema = self.schema_class(""" + + + + + + + + + + + + + """) + + # + any1, any2 = schema.groups['group1'][:2] + self.assertListEqual(any1.namespace, ['tns1']) + any1.union(any2) + self.assertListEqual(any1.namespace, ['tns1', 'tns2']) + + # + any1, any2 = schema.groups['group1'][2:4] + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + any1.union(any2) + self.assertListEqual(any1.not_namespace, ['tns1']) + any2.union(any1) + self.assertListEqual(any2.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][4:6] + any1.union(any2) + self.assertEqual(any1.namespace, ('##any',)) + self.assertEqual(any1.not_namespace, ()) + + # + any1, any2 = schema.groups['group1'][6:8] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][8:10] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][10:12] + any1.union(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['', 'tns1']) + + # + any1, any2 = schema.groups['group1'][12:14] + any1.union(any2) + self.assertListEqual(any1.namespace, ['##any']) + self.assertListEqual(any1.not_namespace, []) + + def test_wildcard_intersection(self): + schema = self.schema_class(""" + + + + + + + + + + + + + + + """) + + # + any1, any2 = schema.groups['group1'][:2] + self.assertListEqual(any1.namespace, ['tns1']) + any1.intersection(any2) + self.assertListEqual(any1.namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][2:4] + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1']) + any1.intersection(any2) + self.assertListEqual(any1.not_namespace, ['tns1', 'tns2']) + any2.intersection(any1) + self.assertListEqual(any2.not_namespace, ['tns1', 'tns2']) + + # + any1, any2 = schema.groups['group1'][4:6] + any1.intersection(any2) + self.assertEqual(any1.namespace, []) + self.assertEqual(any1.not_namespace, ['tns1']) + + # + any1, any2 = schema.groups['group1'][6:8] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1', '']) + + # + any1, any2 = schema.groups['group1'][8:10] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns1', '']) + + # + any1, any2 = schema.groups['group1'][10:12] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['', 'tns1']) + + # + any1, any2 = schema.groups['group1'][12:14] + any1.intersection(any2) + self.assertListEqual(any1.namespace, []) + self.assertListEqual(any1.not_namespace, ['tns2', 'tns1', '']) + + # + # + any1, any2 = schema.groups['group1'][14:16] + any1.intersection(any2) + self.assertListEqual(any1.namespace, ['']) + self.assertListEqual(any1.not_qname, ['##defined', 'qn1']) + + def test_open_content_mode_interleave(self): + schema = self.check_schema(""" + + + + + + + + + + + + + + """) + self.assertEqual(schema.elements['Book'].type.open_content.mode, 'interleave') + self.assertEqual(schema.elements['Book'].type.open_content.any_element.min_occurs, 0) + self.assertIsNone(schema.elements['Book'].type.open_content.any_element.max_occurs) + + schema = self.check_schema(""" + + + + + + + + + + """) + self.assertEqual(schema.types['name'].open_content.mode, 'interleave') + + self.check_schema(""" + + + + + + + + """, XMLSchemaParseError) + + def test_open_content_mode_suffix(self): + schema = self.check_schema(""" + + + + + + + + + + """) + self.assertEqual(schema.types['name'].open_content.mode, 'suffix') + self.assertEqual(schema.types['name'].open_content.any_element.min_occurs, 0) + self.assertIsNone(schema.types['name'].open_content.any_element.max_occurs) + + self.check_schema(""" + + + + + + + + """, XMLSchemaParseError) + + def test_open_content_mode_none(self): + schema = self.check_schema(""" + + + + + + + + """) + self.assertEqual(schema.types['name'].open_content.mode, 'none') + + self.check_schema(""" + + + + + + + + + + """, XMLSchemaParseError) + + def test_open_content_allowed(self): + self.check_schema(""" + + + + + + + + + + """) + + def test_open_content_not_allowed(self): + self.check_schema(""" + + + + + + + + """, XMLSchemaParseError) + + self.check_schema(""" + + + + + + + + """, XMLSchemaParseError) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + + + """) + + def test_open_content_wrong_attributes(self): + self.check_schema(""" + + + + + + + + """, XMLSchemaParseError) + + self.check_schema(""" + + + + + + + + + + """, XMLSchemaParseError) + + self.check_schema(""" + + + + + + + + + + """, XMLSchemaParseError) + + def test_default_open_content(self): + schema = self.schema_class(""" + + + + + """) + self.assertIsInstance(schema.default_open_content, XsdDefaultOpenContent) + self.assertFalse(schema.default_open_content.applies_to_empty) + + schema = self.schema_class(""" + + + + + """) + self.assertTrue(schema.default_open_content.applies_to_empty) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + + + """) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + + + """) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + + + + + + """) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + + + """) + + with self.assertRaises(XMLSchemaParseError): + self.schema_class(""" + + + """) + + def test_open_content_restriction(self): + schema = self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + """) + self.assertEqual(schema.types['derivedType'].content_type[0].name, 'foo') + + self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + """, XMLSchemaParseError) + + def test_open_content_extension(self): + schema = self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + """) + self.assertEqual(schema.types['derivedType'].content_type[0][0].name, 'foo') + self.assertEqual(schema.types['derivedType'].content_type[1][0].name, 'bar') + + self.check_schema(""" + + + + + + + + + + + + + + + + + + + + + + """, XMLSchemaParseError) + + def test_not_qname_attribute(self): + self.assertIsInstance(self.schema_class(""" + + + + + + + """), XMLSchema11) + + self.assertIsInstance(self.schema_class(""" + + + + + + + """), XMLSchema11) + + self.check_schema(""" + + + + + + + """, XMLSchemaParseError) + + def test_any_wildcard(self): + super(TestXsd11Wildcards, self).test_any_wildcard() + self.check_schema(""" + + + + + + """, XMLSchemaParseError) + + schema = self.check_schema(""" + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].not_namespace, ['']) + + schema = self.schema_class(""" + + + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].not_qname, ['{tns1}foo', '{tns1}bar']) + + schema = self.schema_class(""" + + + + + + + + """) + self.assertEqual(schema.types['taggedType'].content_type[-1].not_qname, + ['##defined', '{tns1}foo', '##definedSibling']) + + def test_any_attribute_wildcard(self): + super(TestXsd11Wildcards, self).test_any_attribute_wildcard() + schema = self.schema_class(""" + + + + + + + + + """) + self.assertEqual(schema.types['taggedType'].attributes[None].namespace, ('##any',)) + self.assertEqual(schema.types['taggedType'].attributes[None].not_qname, ['{tns1}foo']) + + +if __name__ == '__main__': + from xmlschema.tests import print_test_header + + print_test_header() + unittest.main() diff --git a/xmlschema/validators/__init__.py b/xmlschema/validators/__init__.py index 389b05b..a86828c 100644 --- a/xmlschema/validators/__init__.py +++ b/xmlschema/validators/__init__.py @@ -11,24 +11,31 @@ """ XML Schema validators subpackage. """ -from .exceptions import XMLSchemaValidatorError, XMLSchemaParseError, XMLSchemaModelError, \ - XMLSchemaModelDepthError, XMLSchemaValidationError, XMLSchemaDecodeError, XMLSchemaEncodeError, \ - XMLSchemaNotBuiltError, XMLSchemaChildrenValidationError, XMLSchemaIncludeWarning, XMLSchemaImportWarning +from .exceptions import XMLSchemaValidatorError, XMLSchemaParseError, \ + XMLSchemaModelError, XMLSchemaModelDepthError, XMLSchemaValidationError, \ + XMLSchemaDecodeError, XMLSchemaEncodeError, XMLSchemaNotBuiltError, \ + XMLSchemaChildrenValidationError, XMLSchemaIncludeWarning, \ + XMLSchemaImportWarning, XMLSchemaTypeTableWarning from .xsdbase import XsdValidator, XsdComponent, XsdAnnotation, XsdType, ValidationMixin, ParticleMixin from .assertions import XsdAssert from .notations import XsdNotation -from .identities import XsdSelector, XsdFieldSelector, XsdIdentity, XsdKeyref, XsdKey, XsdUnique -from .facets import XsdPatternFacets, XsdEnumerationFacets -from .wildcards import XsdAnyElement, Xsd11AnyElement, XsdAnyAttribute, Xsd11AnyAttribute +from .identities import XsdSelector, XsdFieldSelector, XsdIdentity, XsdKeyref, XsdKey, \ + XsdUnique, Xsd11Keyref, Xsd11Key, Xsd11Unique +from .facets import XsdFacet, XsdWhiteSpaceFacet, XsdLengthFacet, XsdMinLengthFacet, \ + XsdMaxLengthFacet, XsdMinExclusiveFacet, XsdMinInclusiveFacet, XsdMaxExclusiveFacet, \ + XsdMaxInclusiveFacet, XsdFractionDigitsFacet, XsdTotalDigitsFacet, \ + XsdExplicitTimezoneFacet, XsdPatternFacets, XsdEnumerationFacets, XsdAssertionFacet +from .wildcards import XsdAnyElement, Xsd11AnyElement, XsdAnyAttribute, Xsd11AnyAttribute, \ + XsdOpenContent, XsdDefaultOpenContent from .attributes import XsdAttribute, Xsd11Attribute, XsdAttributeGroup from .simple_types import xsd_simple_type_factory, XsdSimpleType, XsdAtomic, XsdAtomicBuiltin, \ - XsdAtomicRestriction, Xsd11AtomicRestriction, XsdList, XsdUnion + XsdAtomicRestriction, Xsd11AtomicRestriction, XsdList, XsdUnion, Xsd11Union from .complex_types import XsdComplexType, Xsd11ComplexType from .models import ModelGroup, ModelVisitor from .groups import XsdGroup, Xsd11Group -from .elements import XsdElement, Xsd11Element +from .elements import XsdElement, Xsd11Element, XsdAlternative from .globals_ import XsdGlobals from .schema import XMLSchemaMeta, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11 diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index bb89290..ee7ae19 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -9,10 +9,11 @@ # @author Davide Brunato # from __future__ import unicode_literals -from elementpath import XPath2Parser, XPathContext, XMLSchemaProxy, ElementPathSyntaxError +from elementpath import XPath2Parser, XPathContext, ElementPathError +from elementpath.datatypes import XSD_BUILTIN_TYPES from ..qnames import XSD_ASSERT -from ..xpath import ElementPathMixin +from ..xpath import ElementPathMixin, XMLSchemaProxy from .exceptions import XMLSchemaValidationError from .xsdbase import XsdComponent @@ -20,63 +21,110 @@ from .xsdbase import XsdComponent class XsdAssert(XsdComponent, ElementPathMixin): """ - Class for XSD 'assert' constraint declaration. + Class for XSD *assert* constraint definitions. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_ASSERT} + _ADMITTED_TAGS = {XSD_ASSERT} token = None + parser = None + path = 'true()' def __init__(self, elem, schema, parent, base_type): self.base_type = base_type super(XsdAssert, self).__init__(elem, schema, parent) - if not self.base_type.is_complex(): - self.parse_error("base_type={!r} is not a complexType definition", elem=self.elem) - self.path = 'true()' + + def __repr__(self): + return '%s(test=%r)' % (self.__class__.__name__, self.path) def _parse(self): super(XsdAssert, self)._parse() - try: - self.path = self.elem.attrib['test'] - except KeyError as err: - self.parse_error(str(err), elem=self.elem) - self.path = 'true()' + if self.base_type.is_simple(): + self.parse_error("base_type=%r is not a complexType definition" % self.base_type) + else: + try: + self.path = self.elem.attrib['test'].strip() + except KeyError as err: + self.parse_error(str(err), elem=self.elem) if 'xpathDefaultNamespace' in self.elem.attrib: self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.parser = XPath2Parser(self.namespaces, strict=False, default_namespace=self.xpath_default_namespace) @property def built(self): - return self.token is not None and (self.base_type.is_global or self.base_type.built) + return self.token is not None and (self.base_type.parent is None or self.base_type.built) + + def parse_xpath_test(self): + if not self.base_type.has_simple_content(): + variables = {'value': XSD_BUILTIN_TYPES['anyType'].value} + else: + try: + builtin_type_name = self.base_type.content_type.primitive_type.local_name + except AttributeError: + variables = {'value': XSD_BUILTIN_TYPES['anySimpleType'].value} + else: + variables = {'value': XSD_BUILTIN_TYPES[builtin_type_name].value} + + self.parser = XPath2Parser( + namespaces=self.namespaces, + variables=variables, + strict=False, + default_namespace=self.xpath_default_namespace, + schema=XMLSchemaProxy(self.schema, self) + ) - def parse(self): - self.parser.schema = XMLSchemaProxy(self.schema, self) try: self.token = self.parser.parse(self.path) - except ElementPathSyntaxError as err: + except ElementPathError as err: self.parse_error(err, elem=self.elem) self.token = self.parser.parse('true()') - def __call__(self, elem): - if not self.token.evaluate(XPathContext(root=elem)): - msg = "expression is not true with test path %r." - yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) + def __call__(self, elem, value=None, source=None, namespaces=None, **kwargs): + if value is not None: + self.parser.variables['value'] = self.base_type.text_decode(value) + if not self.parser.is_schema_bound(): + self.parser.schema.bind_parser(self.parser) + + if source is None: + context = XPathContext(root=elem) + else: + context = XPathContext(root=source.root, item=elem) + + default_namespace = self.parser.namespaces[''] + if namespaces and '' in namespaces: + self.parser.namespaces[''] = namespaces[''] + + try: + if not self.token.evaluate(context.copy()): + msg = "expression is not true with test path %r." + yield XMLSchemaValidationError(self, obj=elem, reason=msg % self.path) + except ElementPathError as err: + yield XMLSchemaValidationError(self, obj=elem, reason=str(err)) + + self.parser.namespaces[''] = default_namespace # For implementing ElementPathMixin def __iter__(self): if not self.parent.has_simple_content(): - for e in self.parent.content_type.iter_subelements(): + for e in self.parent.content_type.iter_elements(): yield e @property def attrib(self): return self.parent.attributes + + @property + def type(self): + return self.parent + + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index 53795d6..78df62d 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -17,9 +17,11 @@ from elementpath.datatypes import AbstractDateTime, Duration from ..compat import MutableMapping, ordered_dict_class from ..exceptions import XMLSchemaAttributeError, XMLSchemaTypeError, XMLSchemaValueError -from ..qnames import XSD_ANY_SIMPLE_TYPE, XSD_SIMPLE_TYPE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, \ - XSD_RESTRICTION, XSD_EXTENSION, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE -from ..helpers import get_namespace, get_qname, get_xsd_form_attribute +from ..qnames import XSD_ANNOTATION, XSD_ANY_SIMPLE_TYPE, XSD_SIMPLE_TYPE, \ + XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_RESTRICTION, XSD_EXTENSION, \ + XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE, \ + get_namespace, get_qname +from ..helpers import get_xsd_form_attribute from ..namespaces import XSI_NAMESPACE from .exceptions import XMLSchemaValidationError @@ -30,29 +32,32 @@ from .wildcards import XsdAnyAttribute class XsdAttribute(XsdComponent, ValidationMixin): """ - Class for XSD 1.0 'attribute' declarations. + Class for XSD 1.0 *attribute* declarations. - - Content: (annotation?, simpleType?) - + :ivar type: the XSD simpleType of the attribute. + + .. + Content: (annotation?, simpleType?) + """ - _admitted_tags = {XSD_ATTRIBUTE} - qualified = False + _ADMITTED_TAGS = {XSD_ATTRIBUTE} - def __init__(self, elem, schema, parent, name=None, xsd_type=None): - if xsd_type is not None: - self.type = xsd_type - super(XsdAttribute, self).__init__(elem, schema, parent, name) - self.names = (self.qualified_name,) if self.qualified else (self.qualified_name, self.local_name) + type = None + qualified = False + default = None + fixed = None + + def __init__(self, elem, schema, parent): + super(XsdAttribute, self).__init__(elem, schema, parent) if not hasattr(self, 'type'): raise XMLSchemaAttributeError("undefined 'type' for %r." % self) @@ -70,21 +75,9 @@ class XsdAttribute(XsdComponent, ValidationMixin): def _parse(self): super(XsdAttribute, self)._parse() - elem = self.elem + attrib = self.elem.attrib - try: - form = self.form - except ValueError as err: - self.parse_error(err) - else: - if form is None: - self.qualified = self.schema.attribute_form_default == 'qualified' - elif self.parent is None: - self.parse_error("attribute 'form' not allowed in a global attribute.") - else: - self.qualified = form == 'qualified' - - self.use = elem.get('use') + self.use = attrib.get('use') if self.use is None: self.use = 'optional' elif self.parent is None: @@ -93,11 +86,59 @@ class XsdAttribute(XsdComponent, ValidationMixin): self.parse_error("wrong value %r for 'use' attribute." % self.use) self.use = 'optional' - name = elem.get('name') + if 'default' in attrib: + self.default = attrib['default'] + + if 'fixed' in attrib: + self.fixed = attrib['fixed'] + + if self._parse_reference(): + try: + xsd_attribute = self.maps.lookup_attribute(self.name) + except LookupError: + self.parse_error("unknown attribute %r" % self.name) + self.type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) + else: + self.ref = xsd_attribute + self.type = xsd_attribute.type + if xsd_attribute.qualified: + self.qualified = True + + if self.default is None and xsd_attribute.default is not None: + self.default = xsd_attribute.default + + if xsd_attribute.fixed is not None: + if self.fixed is None: + self.fixed = xsd_attribute.fixed + elif xsd_attribute.fixed != self.fixed: + msg = "referenced attribute has a different fixed value %r" + self.parse_error(msg % xsd_attribute.fixed) + + for attribute in ('form', 'type'): + if attribute in self.elem.attrib: + self.parse_error("attribute %r is not allowed when attribute reference is used." % attribute) + + child = self._parse_child_component(self.elem) + if child is not None and child.tag == XSD_SIMPLE_TYPE: + self.parse_error("not allowed type definition for XSD attribute reference") + return + + try: + form = get_xsd_form_attribute(self.elem, 'form') + except ValueError as err: + self.parse_error(err) + else: + if form is None: + if self.schema.attribute_form_default == 'qualified': + self.qualified = True + elif self.parent is None: + self.parse_error("attribute 'form' not allowed in a global attribute.") + elif form == 'qualified': + self.qualified = True + + name = attrib.get('name') if name is not None: - if 'ref' in elem.attrib: - self.parse_error("both 'name' and 'ref' in attribute declaration") - elif name == 'xmlns': + if name == 'xmlns': self.parse_error("an attribute name must be different from 'xmlns'") if self.parent is None or self.qualified: @@ -107,66 +148,31 @@ class XsdAttribute(XsdComponent, ValidationMixin): self.name = get_qname(self.target_namespace, name) else: self.name = name - elif self.parent is None: - self.parse_error("missing 'name' in global attribute declaration") - else: + + child = self._parse_child_component(self.elem) + if 'type' in attrib: try: - attribute_qname = self.schema.resolve_qname(elem.attrib['ref']) - except KeyError: - self.parse_error("missing both 'name' and 'ref' in attribute declaration") - self.xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - return - except ValueError as err: + type_qname = self.schema.resolve_qname(attrib['type']) + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err) - self.xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - return + xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) else: try: - xsd_attribute = self.maps.lookup_attribute(attribute_qname) - except LookupError: - self.parse_error("unknown attribute %r" % elem.attrib['ref']) - self.type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - else: - self.type = xsd_attribute.type - self.qualified = xsd_attribute.qualified - if xsd_attribute.fixed is not None and 'fixed' in elem.attrib and \ - elem.get('fixed') != xsd_attribute.fixed: - self.parse_error("referenced attribute has a different fixed value %r" % xsd_attribute.fixed) + xsd_type = self.maps.lookup_type(type_qname) + except LookupError as err: + self.parse_error(err) + xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - self.name = attribute_qname - for attribute in ('form', 'type'): - if attribute in self.elem.attrib: - self.parse_error("attribute %r is not allowed when attribute reference is used." % attribute) - xsd_declaration = self._parse_component(elem, required=False) - - if xsd_declaration is not None and xsd_declaration.tag == XSD_SIMPLE_TYPE: - self.parse_error("not allowed type declaration for XSD attribute reference") - return - - xsd_declaration = self._parse_component(elem, required=False) - try: - type_qname = self.schema.resolve_qname(elem.attrib['type']) - except ValueError as err: - self.parse_error(err, elem) - xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - except KeyError: - if xsd_declaration is not None: - # No 'type' attribute in declaration, parse for child local simpleType - xsd_type = self.schema.BUILDERS.simple_type_factory(xsd_declaration, self.schema, self) - else: - # Empty declaration means xsdAnySimpleType - xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) + if child is not None and child.tag == XSD_SIMPLE_TYPE: + self.parse_error("ambiguous type definition for XSD attribute") + elif child is not None: + self.parse_error("not allowed element in XSD attribute declaration: %r" % child[0]) + elif child is not None: + # No 'type' attribute in declaration, parse for child local simpleType + xsd_type = self.schema.BUILDERS.simple_type_factory(child, self.schema, self) else: - try: - xsd_type = self.maps.lookup_type(type_qname) - except LookupError as err: - self.parse_error(err, elem) - xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) - - if xsd_declaration is not None and xsd_declaration.tag == XSD_SIMPLE_TYPE: - self.parse_error("ambiguous type declaration for XSD attribute") - elif xsd_declaration: - self.parse_error("not allowed element in XSD attribute declaration: %r" % xsd_declaration[0]) + # Empty declaration means xsdAnySimpleType + xsd_type = self.maps.lookup_type(XSD_ANY_SIMPLE_TYPE) try: self.type = xsd_type @@ -174,46 +180,30 @@ class XsdAttribute(XsdComponent, ValidationMixin): self.parse_error(err) # Check value constraints - if 'default' in elem.attrib: - if 'fixed' in elem.attrib: + if 'default' in attrib: + if 'fixed' in attrib: self.parse_error("'default' and 'fixed' attributes are mutually exclusive") if self.use != 'optional': self.parse_error("the attribute 'use' must be 'optional' if the attribute 'default' is present") - if not self.type.is_valid(elem.attrib['default']): + if not self.type.is_valid(attrib['default']): msg = "'default' value {!r} is not compatible with the type {!r}" - self.parse_error(msg.format(elem.attrib['default'], self.type)) - elif self.type.is_key(): + self.parse_error(msg.format(attrib['default'], self.type)) + elif self.type.is_key() and self.xsd_version == '1.0': self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'") - elif 'fixed' in elem.attrib: - if not self.type.is_valid(elem.attrib['fixed']): + elif 'fixed' in attrib: + if not self.type.is_valid(attrib['fixed']): msg = "'fixed' value {!r} is not compatible with the type {!r}" - self.parse_error(msg.format(elem.attrib['fixed'], self.type)) - elif self.type.is_key(): + self.parse_error(msg.format(attrib['fixed'], self.type)) + elif self.type.is_key() and self.xsd_version == '1.0': self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'") @property def built(self): - return self.type.parent is None or self.type.built + return True @property def validation_attempted(self): - if self.built: - return 'full' - else: - return self.type.validation_attempted - - # XSD declaration attributes - @property - def ref(self): - return self.elem.get('ref') - - @property - def default(self): - return self.elem.get('default') - - @property - def fixed(self): - return self.elem.get('fixed') + return 'full' @property def form(self): @@ -229,11 +219,23 @@ class XsdAttribute(XsdComponent, ValidationMixin): for obj in self.type.iter_components(xsd_classes): yield obj + def data_value(self, text): + """Returns the decoded data value of the provided text as XPath fn:data().""" + for result in self.iter_decode(text, validation='skip'): + return result + return text + def iter_decode(self, text, validation='lax', **kwargs): if not text and self.default is not None: text = self.default - if self.fixed is not None and text != self.fixed and validation != 'skip': - yield self.validation_error(validation, "value differs from fixed value", text, **kwargs) + + if self.fixed is not None: + if text is None: + text = self.fixed + elif text == self.fixed or validation == 'skip': + pass + elif self.type.text_decode(text) != self.type.text_decode(self.fixed): + yield self.validation_error(validation, "value differs from fixed value", text, **kwargs) for result in self.type.iter_decode(text, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): @@ -262,69 +264,72 @@ class XsdAttribute(XsdComponent, ValidationMixin): class Xsd11Attribute(XsdAttribute): """ - Class for XSD 1.1 'attribute' declarations. + Class for XSD 1.1 *attribute* declarations. - - Content: (annotation?, simpleType?) - + .. + Content: (annotation?, simpleType?) + """ - @property - def inheritable(self): - return self.elem.get('inheritable') in ('0', 'true') + inheritable = False + _target_namespace = None @property def target_namespace(self): - return self.elem.get('targetNamespace', self.schema.target_namespace) + if self._target_namespace is None: + return self.schema.target_namespace + return self._target_namespace def _parse(self): super(Xsd11Attribute, self)._parse() - if not self.elem.get('inheritable') not in {'0', '1', 'false', 'true'}: - self.parse_error("an XML boolean value is required for attribute 'inheritable'") + if self.use == 'prohibited' and 'fixed' in self.elem.attrib: + self.parse_error("attribute 'fixed' with use=prohibited is not allowed in XSD 1.1") + if self._parse_boolean_attribute('inheritable'): + self.inheritable = True self._parse_target_namespace() class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): """ - Class for XSD 'attributeGroup' definitions. + Class for XSD *attributeGroup* definitions. - - Content: (annotation?, ((attribute | attributeGroup)*, anyAttribute?)) - + .. + Content: (annotation?, ((attribute | attributeGroup)*, anyAttribute?)) + """ redefine = None - _admitted_tags = { + _ADMITTED_TAGS = { XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_RESTRICTION, XSD_EXTENSION, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE } - def __init__(self, elem, schema, parent, name=None, derivation=None, base_attributes=None): + def __init__(self, elem, schema, parent, derivation=None, base_attributes=None): self.derivation = derivation self._attribute_group = ordered_dict_class() self.base_attributes = base_attributes - XsdComponent.__init__(self, elem, schema, parent, name) + XsdComponent.__init__(self, elem, schema, parent) def __repr__(self): if self.ref is not None: - return '%s(ref=%r)' % (self.__class__.__name__, self.prefixed_name) + return '%s(ref=%r)' % (self.__class__.__name__, self.name) elif self.name is not None: - return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) + return '%s(name=%r)' % (self.__class__.__name__, self.name) elif self: - names = [a if a.name is None else a.prefixed_name for a in self.values()] + names = [a if a.name is None else a.name for a in self.values()] return '%s(%r)' % (self.__class__.__name__, names) else: return '%s()' % self.__class__.__name__ @@ -377,29 +382,36 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): def _parse(self): super(XsdAttributeGroup, self)._parse() elem = self.elem - any_attribute = False + any_attribute = None attribute_group_refs = [] if elem.tag == XSD_ATTRIBUTE_GROUP: if self.parent is not None: return # Skip dummy definitions try: - self.name = get_qname(self.target_namespace, self.elem.attrib['name']) + self.name = get_qname(self.target_namespace, elem.attrib['name']) except KeyError: self.parse_error("an attribute group declaration requires a 'name' attribute.") return + else: + if self.schema.default_attributes == self.name and self.xsd_version > '1.0': + self.schema.default_attributes = self attributes = ordered_dict_class() - for child in self._iterparse_components(elem): - if any_attribute: + for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + if any_attribute is not None: if child.tag == XSD_ANY_ATTRIBUTE: self.parse_error("more anyAttribute declarations in the same attribute group") else: self.parse_error("another declaration after anyAttribute") elif child.tag == XSD_ANY_ATTRIBUTE: - any_attribute = True - attributes.update([(None, XsdAnyAttribute(child, self.schema, self))]) + any_attribute = self.schema.BUILDERS.any_attribute_class(child, self.schema, self) + if None in attributes: + attributes[None] = attributes[None].copy() + attributes[None].intersection(any_attribute) + else: + attributes[None] = any_attribute elif child.tag == XSD_ATTRIBUTE: attribute = self.schema.BUILDERS.attribute_class(child, self.schema, self) @@ -411,11 +423,14 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): elif child.tag == XSD_ATTRIBUTE_GROUP: try: ref = child.attrib['ref'] - attribute_group_qname = self.schema.resolve_qname(ref) - except ValueError as err: - self.parse_error(err, elem) except KeyError: self.parse_error("the attribute 'ref' is required in a local attributeGroup", elem) + continue + + try: + attribute_group_qname = self.schema.resolve_qname(ref) + except (KeyError, ValueError, RuntimeError) as err: + self.parse_error(err, elem) else: if attribute_group_qname in attribute_group_refs: self.parse_error("duplicated attributeGroup %r" % ref) @@ -431,7 +446,7 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): if not any(e.tag == XSD_ATTRIBUTE_GROUP and ref == e.get('ref') for e in self.redefine.elem): self.parse_error("attributeGroup ref=%r is not in the redefined group" % ref) - elif attribute_group_qname == self.name and self.schema.XSD_VERSION == '1.0': + elif attribute_group_qname == self.name and self.xsd_version == '1.0': self.parse_error("Circular attribute groups not allowed in XSD 1.0") attribute_group_refs.append(attribute_group_qname) @@ -440,16 +455,20 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): except LookupError: self.parse_error("unknown attribute group %r" % child.attrib['ref'], elem) else: - if isinstance(base_attributes, tuple): + if not isinstance(base_attributes, tuple): + for name, attr in base_attributes.items(): + if name not in attributes: + attributes[name] = attr + elif name is not None: + self.parse_error("multiple declaration for attribute {!r}".format(name)) + else: + attributes[None] = attributes[None].copy() + attributes[None].intersection(attr) + + elif self.xsd_version == '1.0': self.parse_error("Circular reference found between attribute groups " "{!r} and {!r}".format(self.name, attribute_group_qname)) - for name, attr in base_attributes.items(): - if name is not None and name in attributes: - self.parse_error("multiple declaration for attribute {!r}".format(name)) - else: - attributes[name] = attr - elif self.name is not None: self.parse_error("(attribute | attributeGroup) expected, found %r." % child) @@ -469,7 +488,7 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): if name is None: if self.derivation == 'extension': try: - attr.extend_namespace(base_attr) + attr.union(base_attr) except ValueError as err: self.parse_error(err) elif not attr.is_restriction(base_attr): @@ -485,7 +504,11 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): attr.type.normalize(attr.fixed) != base_attr.type.normalize(base_attr.fixed): self.parse_error("Attribute %r: derived attribute has a different fixed value" % name) - self._attribute_group.update(self.base_attributes.items()) + if self.redefine is not None: + pass # In case of redefinition do not copy base attributes + else: + self._attribute_group.update(self.base_attributes.items()) + elif self.redefine is not None and not attribute_group_refs: for name, attr in self._attribute_group.items(): if name is None: @@ -514,8 +537,12 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): self.clear() self._attribute_group.update(attributes) + if None in self._attribute_group and None not in attributes and self.derivation == 'restriction': + wildcard = self._attribute_group[None].copy() + wildcard.namespace = wildcard.not_namespace = wildcard.not_qname = () + self._attribute_group[None] = wildcard - if self.schema.XSD_VERSION == '1.0': + if self.xsd_version == '1.0': has_key = False for attr in self._attribute_group.values(): if attr.name is not None and attr.type.is_key(): @@ -528,20 +555,7 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): @property def built(self): - return all([attr.built for attr in self.values()]) - - @property - def validation_attempted(self): - if self.built: - return 'full' - elif any([attr.validation_attempted == 'partial' for attr in self.values()]): - return 'partial' - else: - return 'none' - - @property - def ref(self): - return self.elem.get('ref') + return True def iter_required(self): for k, v in self._attribute_group.items(): @@ -575,18 +589,18 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): if not attrs and not self: return - if validation != 'skip' and any(k not in attrs for k in self.iter_required()): - missing_attrs = {k for k in self.iter_required() if k not in attrs} - reason = "missing required attributes: %r" % missing_attrs - yield self.validation_error(validation, reason, attrs, **kwargs) + if validation != 'skip': + for k in filter(lambda x: x not in attrs, self.iter_required()): + reason = "missing required attribute: %r" % k + yield self.validation_error(validation, reason, attrs, **kwargs) use_defaults = kwargs.get('use_defaults', True) - filler = kwargs.get('filler') - additional_attrs = {k: v for k, v in self.iter_predefined(use_defaults) if k not in attrs} + additional_attrs = [(k, v) for k, v in self.iter_predefined(use_defaults) if k not in attrs] if additional_attrs: attrs = {k: v for k, v in attrs.items()} attrs.update(additional_attrs) + filler = kwargs.get('filler') result_list = [] for name, value in attrs.items(): try: @@ -609,6 +623,10 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): reason = "%r attribute not allowed for element." % name yield self.validation_error(validation, reason, attrs, **kwargs) continue + else: + if xsd_attribute.use == 'prohibited': + reason = "use of attribute %r is prohibited" % name + yield self.validation_error(validation, reason, attrs, **kwargs) for result in xsd_attribute.iter_decode(value, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): @@ -631,13 +649,16 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin): yield result_list def iter_encode(self, attrs, validation='lax', **kwargs): - if validation != 'skip' and any(k not in attrs for k in self.iter_required()): - missing_attrs = {k for k in self.iter_required() if k not in attrs} - reason = "missing required attributes: %r" % missing_attrs - yield self.validation_error(validation, reason, attrs, **kwargs) + if not attrs and not self: + return + + if validation != 'skip': + for k in filter(lambda x: x not in attrs, self.iter_required()): + reason = "missing required attribute: %r" % k + yield self.validation_error(validation, reason, attrs, **kwargs) use_defaults = kwargs.get('use_defaults', True) - additional_attrs = {k: v for k, v in self.iter_predefined(use_defaults) if k not in attrs} + additional_attrs = [(k, v) for k, v in self.iter_predefined(use_defaults) if k not in attrs] if additional_attrs: attrs = {k: v for k, v in attrs.items()} attrs.update(additional_attrs) diff --git a/xmlschema/validators/builtins.py b/xmlschema/validators/builtins.py index af9e921..682e879 100644 --- a/xmlschema/validators/builtins.py +++ b/xmlschema/validators/builtins.py @@ -25,8 +25,21 @@ from elementpath import datatypes from ..compat import PY3, long_type, unicode_type from ..exceptions import XMLSchemaValueError -from ..qnames import * -from ..etree import etree_element, is_etree_element +from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ + XSD_PATTERN, XSD_WHITE_SPACE, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, \ + XSD_MAX_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_EXPLICIT_TIMEZONE, \ + XSD_STRING, XSD_NORMALIZED_STRING, XSD_NAME, XSD_NCNAME, XSD_QNAME, XSD_TOKEN, \ + XSD_NMTOKEN, XSD_ID, XSD_IDREF, XSD_LANGUAGE, XSD_DECIMAL, XSD_DOUBLE, XSD_FLOAT, \ + XSD_INTEGER, XSD_BYTE, XSD_SHORT, XSD_INT, XSD_LONG, XSD_UNSIGNED_BYTE, \ + XSD_UNSIGNED_SHORT, XSD_UNSIGNED_INT, XSD_UNSIGNED_LONG, XSD_POSITIVE_INTEGER, \ + XSD_NEGATIVE_INTEGER, XSD_NON_NEGATIVE_INTEGER, XSD_NON_POSITIVE_INTEGER, \ + XSD_GDAY, XSD_GMONTH, XSD_GMONTH_DAY, XSD_GYEAR, XSD_GYEAR_MONTH, XSD_TIME, XSD_DATE, \ + XSD_DATETIME, XSD_DATE_TIME_STAMP, XSD_ENTITY, XSD_ANY_URI, XSD_BOOLEAN, \ + XSD_DURATION, XSD_DAY_TIME_DURATION, XSD_YEAR_MONTH_DURATION, XSD_BASE64_BINARY, \ + XSD_HEX_BINARY, XSD_NOTATION_TYPE, XSD_ERROR, XSD_ASSERTION, XSD_SIMPLE_TYPE, \ + XSD_COMPLEX_TYPE, XSD_ANY_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ANY_SIMPLE_TYPE +from ..etree import etree_element +from ..helpers import is_etree_element from .exceptions import XMLSchemaValidationError from .facets import XSD_10_FACETS_BUILDERS, XSD_11_FACETS_BUILDERS from .simple_types import XsdSimpleType, XsdAtomicBuiltin @@ -155,6 +168,10 @@ def base64_binary_validator(x): yield XMLSchemaValidationError(base64_binary_validator, x, "not a base64 encoding: %s." % err) +def error_type_validator(x): + yield XMLSchemaValidationError(error_type_validator, x, "not value is allowed for xs:error type.") + + # # XSD builtin decoding functions def boolean_to_python(s): @@ -309,7 +326,7 @@ XSD_COMMON_BUILTIN_TYPES = ( 'python_type': (unicode_type, str), 'base_type': XSD_TOKEN, 'facets': [ - etree_element(XSD_PATTERN, value=r"([a-zA-Z]{2}|[iI]-[a-zA-Z]+|[xX]-[a-zA-Z]{1,8})(-[a-zA-Z]{1,8})*") + etree_element(XSD_PATTERN, value=r"[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ] }, # language codes { @@ -516,6 +533,13 @@ XSD_11_BUILTIN_TYPES = XSD_COMMON_BUILTIN_TYPES + ( 'base_type': XSD_DURATION, 'to_python': datatypes.YearMonthDuration.fromstring, }, # PnYnMnDTnHnMnS with day and time equals to 0 + # --- xs:error primitive type (XSD 1.1) --- + { + 'name': XSD_ERROR, + 'python_type': type(None), + 'admitted_facets': (), + 'facets': [error_type_validator], + }, # xs:error has no value space and no lexical space ) diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 94ac7be..e45ff30 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -11,11 +11,11 @@ from __future__ import unicode_literals from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, \ - XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, XSD_RESTRICTION, XSD_COMPLEX_TYPE, \ - XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT -from ..helpers import get_qname, local_name, get_xml_bool_attribute, get_xsd_derivation_attribute -from ..etree import etree_element +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, \ + XSD_ALL, XSD_CHOICE, XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, \ + XSD_RESTRICTION, XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, \ + XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, get_qname, local_name +from ..helpers import get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError from .xsdbase import XsdType, ValidationMixin @@ -27,32 +27,35 @@ from .wildcards import XsdOpenContent XSD_MODEL_GROUP_TAGS = {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE} -SEQUENCE_ELEMENT = etree_element(XSD_SEQUENCE) - class XsdComplexType(XsdType, ValidationMixin): """ - Class for XSD 1.0 'complexType' definitions. + Class for XSD 1.0 *complexType* definitions. - - Content: (annotation?, (simpleContent | complexContent | - ((group | all | choice | sequence)?, ((attribute | attributeGroup)*, anyAttribute?)))) - + :var attributes: the attribute group related with the type. + :var content_type: the content type, that can be a model group or a simple type. + :var mixed: if `True` the complex type has mixed content. + + .. + Content: (annotation?, (simpleContent | complexContent | + ((group | all | choice | sequence)?, ((attribute | attributeGroup)*, anyAttribute?)))) + """ + abstract = False mixed = False assertions = () open_content = None - - _admitted_tags = {XSD_COMPLEX_TYPE, XSD_RESTRICTION} _block = None - _derivation = None + + _ADMITTED_TAGS = {XSD_COMPLEX_TYPE, XSD_RESTRICTION} + _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE} @staticmethod def normalize(text): @@ -75,7 +78,7 @@ class XsdComplexType(XsdType, ValidationMixin): def __repr__(self): if self.name is not None: return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) - elif not hasattr(self, 'content_type'): + elif not hasattr(self, 'content_type') or not hasattr(self, 'attributes'): return '%s(id=%r)' % (self.__class__.__name__, id(self)) else: return '%s(content=%r, attributes=%r)' % ( @@ -98,11 +101,8 @@ class XsdComplexType(XsdType, ValidationMixin): if elem.tag == XSD_RESTRICTION: return # a local restriction is already parsed by the caller - if 'abstract' in elem.attrib: - try: - self.abstract = get_xml_bool_attribute(elem, 'abstract') - except ValueError as err: - self.parse_error(err, elem) + if self._parse_boolean_attribute('abstract'): + self.abstract = True if 'block' in elem.attrib: try: @@ -116,31 +116,27 @@ class XsdComplexType(XsdType, ValidationMixin): except ValueError as err: self.parse_error(err, elem) - if 'mixed' in elem.attrib: - try: - self.mixed = get_xml_bool_attribute(elem, 'mixed') - except ValueError as err: - self.parse_error(err, elem) + if self._parse_boolean_attribute('mixed'): + self.mixed = True try: - self.name = get_qname(self.target_namespace, elem.attrib['name']) + self.name = get_qname(self.target_namespace, self.elem.attrib['name']) except KeyError: self.name = None + if self.parent is None: + self.parse_error("missing attribute 'name' in a global complexType") + self.name = 'nameless_%s' % str(id(self)) else: if self.parent is not None: - self.parse_error("attribute 'name' not allowed for a local complexType", elem) + self.parse_error("attribute 'name' not allowed for a local complexType") + self.name = None - content_elem = self._parse_component(elem, required=False, strict=False) - if content_elem is None or content_elem.tag in \ - {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE}: - # - # complexType with empty content - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + content_elem = self._parse_child_component(elem, strict=False) + if content_elem is None or content_elem.tag in self._CONTENT_TAIL_TAGS: + self.content_type = self.schema.create_empty_content_group(self) self._parse_content_tail(elem) elif content_elem.tag in {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - # - # complexType with child elements self.content_type = self.schema.BUILDERS.group_class(content_elem, self.schema, self) self._parse_content_tail(elem) @@ -152,11 +148,11 @@ class XsdComplexType(XsdType, ValidationMixin): if derivation_elem is None: return - self.base_type = self._parse_base_type(derivation_elem) + self.base_type = base_type = self._parse_base_type(derivation_elem) if derivation_elem.tag == XSD_RESTRICTION: - self._parse_simple_content_restriction(derivation_elem, self.base_type) + self._parse_simple_content_restriction(derivation_elem, base_type) else: - self._parse_simple_content_extension(derivation_elem, self.base_type) + self._parse_simple_content_extension(derivation_elem, base_type) if content_elem is not elem[-1]: k = 2 if content_elem is not elem[0] else 1 @@ -166,13 +162,24 @@ class XsdComplexType(XsdType, ValidationMixin): # # complexType with complexContent restriction/extension if 'mixed' in content_elem.attrib: - self.mixed = content_elem.attrib['mixed'] in ('true', '1') + mixed = content_elem.attrib['mixed'] in ('true', '1') + if mixed is not self.mixed: + self.mixed = mixed + if 'mixed' in elem.attrib and self.xsd_version == '1.1': + self.parse_error( + "value of 'mixed' attribute in complexType and complexContent must be same" + ) derivation_elem = self._parse_derivation_elem(content_elem) if derivation_elem is None: return base_type = self._parse_base_type(derivation_elem, complex_content=True) + if base_type is not self: + self.base_type = base_type + elif self.redefine: + self.base_type = self.redefine + if derivation_elem.tag == XSD_RESTRICTION: self._parse_complex_content_restriction(derivation_elem, base_type) else: @@ -182,16 +189,11 @@ class XsdComplexType(XsdType, ValidationMixin): k = 2 if content_elem is not elem[0] else 1 self.parse_error("unexpected tag %r after complexContent declaration:" % elem[k].tag, elem) - if base_type is not self: - self.base_type = base_type - elif self.redefine: - self.base_type = self.redefine - - elif content_elem.tag == XSD_OPEN_CONTENT and self.schema.XSD_VERSION != '1.0': + elif content_elem.tag == XSD_OPEN_CONTENT and self.xsd_version > '1.0': self.open_content = XsdOpenContent(content_elem, self.schema, self) if content_elem is elem[-1]: - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + self.content_type = self.schema.create_empty_content_group(self) else: for index, child in enumerate(elem): if content_elem is not child: @@ -199,7 +201,7 @@ class XsdComplexType(XsdType, ValidationMixin): elif elem[index + 1].tag in {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: self.content_type = self.schema.BUILDERS.group_class(elem[index + 1], self.schema, self) else: - self.content_type = self.schema.BUILDERS.group_class(SEQUENCE_ELEMENT, self.schema, self) + self.content_type = self.schema.self.schema.create_empty_content_group(self) break self._parse_content_tail(elem) @@ -220,7 +222,7 @@ class XsdComplexType(XsdType, ValidationMixin): self.attributes = self.schema.BUILDERS.attribute_group_class(elem, self.schema, self, **kwargs) def _parse_derivation_elem(self, elem): - derivation_elem = self._parse_component(elem, required=False) + derivation_elem = self._parse_child_component(elem) if getattr(derivation_elem, 'tag', None) not in (XSD_RESTRICTION, XSD_EXTENSION): self.parse_error("restriction or extension tag expected", derivation_elem) self.content_type = self.schema.create_any_content_group(self) @@ -228,8 +230,8 @@ class XsdComplexType(XsdType, ValidationMixin): return derivation = local_name(derivation_elem.tag) - if self._derivation is None: - self._derivation = derivation == 'extension' + if self.derivation is None: + self.derivation = derivation elif self.redefine is None: raise XMLSchemaValueError("%r is expected to have a redefined/overridden component" % self) @@ -240,11 +242,11 @@ class XsdComplexType(XsdType, ValidationMixin): def _parse_base_type(self, elem, complex_content=False): try: base_qname = self.schema.resolve_qname(elem.attrib['base']) - except KeyError: - self.parse_error("'base' attribute required", elem) - return self.maps.types[XSD_ANY_TYPE] - except ValueError as err: - self.parse_error(err, elem) + except (KeyError, ValueError, RuntimeError) as err: + if 'base' not in elem.attrib: + self.parse_error("'base' attribute required", elem) + else: + self.parse_error(err, elem) return self.maps.types[XSD_ANY_TYPE] try: @@ -262,6 +264,11 @@ class XsdComplexType(XsdType, ValidationMixin): elif complex_content and base_type.is_simple(): self.parse_error("a complexType ancestor required: %r" % base_type, elem) return self.maps.types[XSD_ANY_TYPE] + + if base_type.final and elem.tag.rsplit('}', 1)[-1] in base_type.final: + msg = "derivation by %r blocked by attribute 'final' in base type" + self.parse_error(msg % elem.tag.rsplit('}', 1)[-1]) + return base_type def _parse_simple_content_restriction(self, elem, base_type): @@ -289,10 +296,9 @@ class XsdComplexType(XsdType, ValidationMixin): def _parse_simple_content_extension(self, elem, base_type): # simpleContent extension: the base type must be a simpleType or a complexType # with simple content. - child = self._parse_component(elem, required=False, strict=False) - if child is not None and child.tag not in \ - {XSD_ATTRIBUTE_GROUP, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE}: - self.parse_error("unexpected tag %r." % child.tag, child) + child = self._parse_child_component(elem, strict=False) + if child is not None and child.tag not in self._CONTENT_TAIL_TAGS: + self.parse_error('unexpected tag %r' % child.tag, child) if base_type.is_simple(): self.content_type = base_type @@ -314,12 +320,20 @@ class XsdComplexType(XsdType, ValidationMixin): base_type = self.maps.types[XSD_ANY_TYPE] # complexContent restriction: the base type must be a complexType with a complex content. - group_elem = self._parse_component(elem, required=False, strict=False) - if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: - content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + if child.tag == XSD_OPEN_CONTENT and self.xsd_version > '1.0': + self.open_content = XsdOpenContent(child, self.schema, self) + continue + elif child.tag in XSD_MODEL_GROUP_TAGS: + content_type = self.schema.BUILDERS.group_class(child, self.schema, self) + if not base_type.content_type.admits_restriction(content_type.model): + msg = "restriction of an xs:{} with more than one particle with xs:{} is forbidden" + self.parse_error(msg.format(base_type.content_type.model, content_type.model)) + break else: - # Empty content model - content_type = self.schema.BUILDERS.group_class(elem, self.schema, self) + content_type = self.schema.create_empty_content_group(self, base_type.content_type.model) + + content_type.restriction = base_type.content_type if base_type.is_element_only() and content_type.mixed: self.parse_error( @@ -330,9 +344,16 @@ class XsdComplexType(XsdType, ValidationMixin): "derived an empty content from base type that has not empty content.", elem ) - if base_type.name != XSD_ANY_TYPE and not base_type.is_empty() and False: - if not content_type.has_occurs_restriction(base_type.content_type): - self.parse_error("The derived group %r is not a restriction of the base group." % elem, elem) + if not self.open_content: + if self.schema.default_open_content: + self.open_content = self.schema.default_open_content + elif getattr(base_type, 'open_content', None): + self.open_content = base_type.open_content + + if self.open_content and content_type and \ + not self.open_content.is_restriction(base_type.open_content): + msg = "{!r} is not a restriction of the base type {!r}" + self.parse_error(msg.format(self.open_content, base_type.open_content)) self.content_type = content_type self._parse_content_tail(elem, derivation='restriction', base_attributes=base_type.attributes) @@ -341,78 +362,87 @@ class XsdComplexType(XsdType, ValidationMixin): if 'extension' in base_type.final: self.parse_error("the base type is not derivable by extension") - group_elem = self._parse_component(elem, required=False, strict=False) - if base_type.is_empty(): - # Empty model extension: don't create a nested group. - if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: - self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) - else: - # Empty content model - self.content_type = self.schema.BUILDERS.group_class(elem, self.schema, self) + for group_elem in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + break else: - # Set the content type using a dummy sequence element - sequence_elem = etree_element(XSD_SEQUENCE) - sequence_elem.text = '\n ' - content_type = self.schema.BUILDERS.group_class(sequence_elem, self.schema, self) + group_elem = None - if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: - # Illegal derivation from a simple content. Applies to both XSD 1.0 and XSD 1.1. - # For the detailed rule refer to XSD 1.1 documentation: - # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#sec-cos-ct-extends - if base_type.is_simple() or base_type.has_simple_content(): - self.parse_error("base %r is simple or has a simple content." % base_type, elem) - base_type = self.maps.types[XSD_ANY_TYPE] + if base_type.is_empty(): + if not base_type.mixed: + # Empty element-only model extension: don't create a nested group. + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + elif base_type.is_simple() or base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + else: + self.content_type = self.schema.create_empty_content_group( + parent=self, model=base_type.content_type.model + ) + elif base_type.mixed: + # Empty mixed model extension + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(self.schema.create_empty_content_group(self.content_type)) - group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) - if group.model == 'all': - self.parse_error("Cannot extend a complex content with an all model") + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self.content_type) + if not self.mixed: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + else: + group = self.schema.create_empty_content_group(self) - content_type.append(base_type.content_type) - content_type.append(group) - sequence_elem.append(base_type.content_type.elem) - sequence_elem.append(group.elem) + self.content_type.append(group) + self.content_type.elem.append(base_type.content_type.elem) + self.content_type.elem.append(group.elem) - # complexContent extension: base type must be a complex type with complex content. - # A dummy sequence group is added if the base type has not empty content model. - if base_type.content_type.model == 'all' and base_type.content_type and group \ - and self.schema.XSD_VERSION == '1.0': - self.parse_error("XSD 1.0 does not allow extension of a not empty 'ALL' model group.", elem) + elif group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + # Derivation from a simple content is forbidden if base type is not empty. + if base_type.is_simple() or base_type.has_simple_content(): + self.parse_error("base %r is simple or has a simple content." % base_type, elem) + base_type = self.any_type - if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: - self.parse_error("base has a different content type (mixed=%r) and the " - "extension group is not empty." % base_type.mixed, elem) + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) - elif not base_type.is_simple() and not base_type.has_simple_content(): - content_type.append(base_type.content_type) - sequence_elem.append(base_type.content_type.elem) - if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: - self.parse_error("extended type has a mixed content but the base is element-only", elem) + if group.model == 'all': + self.parse_error("cannot extend a complex content with xs:all") + if base_type.content_type.model == 'all' and group.model == 'sequence': + self.parse_error("xs:sequence cannot extend xs:all") + content_type = self.schema.create_empty_content_group(self) + content_type.append(base_type.content_type) + content_type.append(group) + content_type.elem.append(base_type.content_type.elem) + content_type.elem.append(group.elem) + + if base_type.content_type.model == 'all' and base_type.content_type and group: + self.parse_error("XSD 1.0 does not allow extension of a not empty 'all' model group") + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) self.content_type = content_type + elif not base_type.is_simple() and not base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(base_type.content_type) + self.content_type.elem.append(base_type.content_type.elem) + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: + self.parse_error("extended type has a mixed content but the base is element-only", elem) + else: + self.content_type = self.schema.create_empty_content_group(self) + self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes) + @property + def block(self): + return self.schema.block_default if self._block is None else self._block + @property def built(self): - try: - return self.content_type.built and self.attributes.built and self.mixed in (False, True) - except AttributeError: - return False + return self.content_type.parent is not None or self.content_type.built @property def validation_attempted(self): - if self.built: - return 'full' - elif self.attributes.validation_attempted == 'partial': - return 'partial' - elif self.content_type.validation_attempted == 'partial': - return 'partial' - else: - return 'none' - - @property - def block(self): - return self.schema.block_default if self._block is None else self._block + return 'full' if self.built else self.content_type.validation_attempted @staticmethod def is_simple(): @@ -456,23 +486,25 @@ class XsdComplexType(XsdType, ValidationMixin): def is_list(self): return self.has_simple_content() and self.content_type.is_list() - def is_valid(self, source, use_defaults=True): + def is_valid(self, source, use_defaults=True, namespaces=None): if hasattr(source, 'tag'): - return super(XsdComplexType, self).is_valid(source, use_defaults) + return super(XsdComplexType, self).is_valid(source, use_defaults, namespaces) elif isinstance(self.content_type, XsdSimpleType): - return self.content_type.is_valid(source) + return self.content_type.is_valid(source, use_defaults, namespaces) else: - return self.base_type is not None and self.base_type.is_valid(source) or self.mixed + return self.mixed or self.base_type is not None and \ + self.base_type.is_valid(source, use_defaults, namespaces) def is_derived(self, other, derivation=None): + if derivation and derivation == self.derivation: + derivation = None # derivation mode checked + if self is other: - return True - elif derivation and self.derivation and derivation != self.derivation and other.is_complex(): - return False + return derivation is None elif other.name == XSD_ANY_TYPE: return True elif self.base_type is other: - return True + return derivation is None or self.base_type.derivation == derivation elif hasattr(other, 'member_types'): return any(self.is_derived(m, derivation) for m in other.member_types) elif self.base_type is None: @@ -497,7 +529,7 @@ class XsdComplexType(XsdType, ValidationMixin): for obj in self.base_type.iter_components(xsd_classes): yield obj - for obj in self.assertions: + for obj in filter(lambda x: x.base_type is self, self.assertions): if xsd_classes is None or isinstance(obj, xsd_classes): yield obj @@ -511,15 +543,17 @@ class XsdComplexType(XsdType, ValidationMixin): else: return self.has_simple_content() or self.mixed and self.is_emptiable() - @property - def derivation(self): - return 'extension' if self._derivation else 'restriction' if self._derivation is False else None - def has_restriction(self): - return self._derivation is False + return self.derivation == 'restriction' def has_extension(self): - return self._derivation is True + return self.derivation == 'extension' + + def text_decode(self, text): + if self.has_simple_content(): + return self.content_type.decode(text, validation='skip') + else: + return text def decode(self, data, *args, **kwargs): if hasattr(data, 'attrib') or self.is_simple(): @@ -541,7 +575,7 @@ class XsdComplexType(XsdType, ValidationMixin): """ # XSD 1.1 assertions for assertion in self.assertions: - for error in assertion(elem): + for error in assertion(elem, **kwargs): yield self.validation_error(validation, error, **kwargs) for result in self.attributes.iter_decode(elem.attrib, validation, **kwargs): @@ -614,46 +648,188 @@ class XsdComplexType(XsdType, ValidationMixin): class Xsd11ComplexType(XsdComplexType): """ - Class for XSD 1.1 'complexType' definitions. + Class for XSD 1.1 *complexType* definitions. - - Content: (annotation?, (simpleContent | complexContent | (openContent?, - (group | all | choice | sequence)?, ((attribute | attributeGroup)*, anyAttribute?), assert*))) - + .. + Content: (annotation?, (simpleContent | complexContent | (openContent?, + (group | all | choice | sequence)?, ((attribute | attributeGroup)*, anyAttribute?), assert*))) + """ + default_attributes_apply = True + + _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE_GROUP, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE, XSD_ASSERT} + def _parse(self): super(Xsd11ComplexType, self)._parse() + if self.base_type and self.base_type.base_type is self.any_simple_type and \ + self.base_type.derivation == 'extension' and not self.attributes: + # Derivation from xs:anySimpleType with missing variety. + # See: http://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition_details + msg = "the simple content of {!r} is not a valid simple type in XSD 1.1" + self.parse_error(msg.format(self.base_type)) + + # Add open content to complex content type + if isinstance(self.content_type, XsdGroup): + open_content = self.open_content or self.schema.default_open_content + if open_content is None: + pass + elif open_content.mode == 'interleave': + self.content_type.interleave = self.content_type.suffix = open_content.any_element + elif open_content.mode == 'suffix': + self.content_type.suffix = open_content.any_element + # Add inheritable attributes if hasattr(self.base_type, 'attributes'): for name, attr in self.base_type.attributes.items(): - if name and attr.inheritable: + if attr.inheritable: if name not in self.attributes: self.attributes[name] = attr elif not self.attributes[name].inheritable: self.parse_error("attribute %r must be inheritable") + if 'defaultAttributesApply' in self.elem.attrib: + if self.elem.attrib['defaultAttributesApply'].strip() in {'false', '0'}: + self.default_attributes_apply = False + # Add default attributes - if isinstance(self.schema.default_attributes, XsdAttributeGroup) and self.default_attributes_apply: + if self.redefine is None: + default_attributes = self.schema.default_attributes + else: + default_attributes = self.redefine.schema.default_attributes + + if default_attributes is None: + pass + elif self.default_attributes_apply and not self.is_override(): + if self.redefine is None and any(k in self.attributes for k in default_attributes): + self.parse_error("at least a default attribute is already declared in the complex type") self.attributes.update( - (k, v) for k, v in self.schema.default_attributes.items() if k not in self.attributes + (k, v) for k, v in default_attributes.items() if k not in self.attributes ) + def _parse_complex_content_extension(self, elem, base_type): + # Complex content extension with simple base is forbidden XSD 1.1. + # For the detailed rule refer to XSD 1.1 documentation: + # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#sec-cos-ct-extends + if base_type.is_simple() or base_type.has_simple_content(): + self.parse_error("base %r is simple or has a simple content." % base_type, elem) + base_type = self.any_type + + if 'extension' in base_type.final: + self.parse_error("the base type is not derivable by extension") + + # Parse openContent + for group_elem in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + if group_elem.tag != XSD_OPEN_CONTENT: + break + self.open_content = XsdOpenContent(group_elem, self.schema, self) + try: + self.open_content.any_element.union(base_type.open_content.any_element) + except AttributeError: + pass + else: + group_elem = None + + if not self.open_content: + if self.schema.default_open_content: + self.open_content = self.schema.default_open_content + elif getattr(base_type, 'open_content', None): + self.open_content = base_type.open_content + + try: + if self.open_content and not base_type.open_content.is_restriction(self.open_content): + msg = "{!r} is not an extension of the base type {!r}" + self.parse_error(msg.format(self.open_content, base_type.open_content)) + except AttributeError: + pass + + if not base_type.content_type: + if not base_type.mixed: + # Empty element-only model extension: don't create a nested sequence group. + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + self.content_type = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + else: + self.content_type = self.schema.create_empty_content_group( + parent=self, model=base_type.content_type.model + ) + elif base_type.mixed: + # Empty mixed model extension + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(self.schema.create_empty_content_group(self.content_type)) + + if group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self.content_type) + if not self.mixed: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + if group.model == 'all': + self.parse_error("cannot extend an empty mixed content with an xs:all") + else: + group = self.schema.create_empty_content_group(self) + + self.content_type.append(group) + self.content_type.elem.append(base_type.content_type.elem) + self.content_type.elem.append(group.elem) + + elif group_elem is not None and group_elem.tag in XSD_MODEL_GROUP_TAGS: + group = self.schema.BUILDERS.group_class(group_elem, self.schema, self) + + if base_type.content_type.model != 'all': + content_type = self.schema.create_empty_content_group(self) + content_type.append(base_type.content_type) + content_type.elem.append(base_type.content_type.elem) + + if group.model == 'all': + msg = "xs:all cannot extend a not empty xs:%s" + self.parse_error(msg % base_type.content_type.model) + else: + content_type.append(group) + content_type.elem.append(group.elem) + else: + content_type = self.schema.create_empty_content_group(self, model='all') + content_type.extend(base_type.content_type) + content_type.elem.extend(base_type.content_type.elem) + + if not group: + pass + elif group.model != 'all': + self.parse_error("cannot extend a not empty 'all' model group with a different model") + elif base_type.content_type.min_occurs != group.min_occurs: + self.parse_error("when extend an xs:all group minOccurs must be the same") + elif base_type.mixed and not base_type.content_type: + self.parse_error("cannot extend an xs:all group with mixed empty content") + else: + content_type.extend(group) + content_type.elem.extend(group.elem) + + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE: + self.parse_error("base has a different content type (mixed=%r) and the " + "extension group is not empty." % base_type.mixed, elem) + + self.content_type = content_type + + elif not base_type.is_simple() and not base_type.has_simple_content(): + self.content_type = self.schema.create_empty_content_group(self) + self.content_type.append(base_type.content_type) + self.content_type.elem.append(base_type.content_type.elem) + if base_type.mixed != self.mixed and base_type.name != XSD_ANY_TYPE and self.mixed: + self.parse_error("extended type has a mixed content but the base is element-only", elem) + else: + self.content_type = self.schema.create_empty_content_group(self) + + self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes) + def _parse_content_tail(self, elem, **kwargs): self.attributes = self.schema.BUILDERS.attribute_group_class(elem, self.schema, self, **kwargs) - self.assertions = [] - for child in self._iterparse_components(elem): - if child.tag == XSD_ASSERT: - self.assertions.append(XsdAssert(child, self.schema, self, self)) - @property - def default_attributes_apply(self): - return get_xml_bool_attribute(self.elem, 'defaultAttributesApply', default=True) + self.assertions = [XsdAssert(e, self.schema, self, self) for e in elem if e.tag == XSD_ASSERT] + if getattr(self.base_type, 'assertions', None): + self.assertions.extend(assertion for assertion in self.base_type.assertions) diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index ad38660..a5fdc3f 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -12,24 +12,25 @@ This module contains classes for XML Schema elements, complex types and model groups. """ from __future__ import unicode_literals +import warnings from decimal import Decimal -from elementpath import XPath2Parser, ElementPathSyntaxError, XPathContext -from elementpath.xpath_helpers import boolean_value +from elementpath import XPath2Parser, ElementPathError, XPathContext from elementpath.datatypes import AbstractDateTime, Duration from ..exceptions import XMLSchemaAttributeError -from ..qnames import XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE_GROUP, \ - XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, \ - XSD_KEY, XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID -from ..helpers import get_qname, get_xml_bool_attribute, get_xsd_derivation_attribute, \ - get_xsd_form_attribute, ParticleCounter +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ + XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, \ + XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, XSD_KEY, \ + XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID, XSD_ERROR, get_qname from ..etree import etree_element +from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute, \ + ParticleCounter, strictly_equal from ..converters import ElementData, raw_xml_encode, XMLSchemaConverter -from ..xpath import ElementPathMixin +from ..xpath import XMLSchemaProxy, ElementPathMixin -from .exceptions import XMLSchemaValidationError +from .exceptions import XMLSchemaValidationError, XMLSchemaTypeTableWarning from .xsdbase import XsdComponent, XsdType, ValidationMixin, ParticleMixin -from .identities import XsdUnique, XsdKey, XsdKeyref +from .identities import XsdKeyref from .wildcards import XsdAnyElement @@ -39,41 +40,52 @@ XSD_ATTRIBUTE_GROUP_ELEMENT = etree_element(XSD_ATTRIBUTE_GROUP) class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin): """ - Class for XSD 1.0 'element' declarations. + Class for XSD 1.0 *element* declarations. - - Content: (annotation?, ((simpleType | complexType)?, (unique | key | keyref)*)) - + :ivar type: the XSD simpleType or complexType of the element. + :ivar attributes: the group of the attributes associated with the element. + + .. + Content: (annotation?, ((simpleType | complexType)?, (unique | key | keyref)*)) + """ - _admitted_tags = {XSD_ELEMENT} + type = None qualified = False - _ref = None + alternatives = () + inheritable = () + + _ADMITTED_TAGS = {XSD_ELEMENT} _abstract = False _block = None _final = None + _form = None + _nillable = False _substitution_group = None - def __init__(self, elem, schema, parent, name=None): - super(XsdElement, self).__init__(elem, schema, parent, name) - self.names = (self.qualified_name,) if self.qualified else (self.qualified_name, self.local_name) - if not hasattr(self, 'type'): + def __init__(self, elem, schema, parent): + super(XsdElement, self).__init__(elem, schema, parent) + if self.qualified or self.ref is not None or 'targetNamespace' in elem.attrib: + self.names = (self.qualified_name,) + else: + self.names = (self.qualified_name, self.local_name) + if self.type is None: raise XMLSchemaAttributeError("undefined 'type' attribute for %r." % self) - if not hasattr(self, 'qualified'): + if self.qualified is None: raise XMLSchemaAttributeError("undefined 'qualified' attribute for %r." % self) def __repr__(self): @@ -84,141 +96,127 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def __setattr__(self, name, value): if name == "type": - assert value is None or isinstance(value, XsdType), "Wrong value %r for attribute 'type'." % value - if hasattr(value, 'attributes'): + assert value is None or isinstance(value, XsdType) + try: self.attributes = value.attributes - else: - self.attributes = self.schema.BUILDERS.attribute_group_class( - XSD_ATTRIBUTE_GROUP_ELEMENT, self.schema, self - ) + except AttributeError: + self.attributes = self.schema.create_empty_attribute_group(self) super(XsdElement, self).__setattr__(name, value) def __iter__(self): if not self.type.has_simple_content(): - for e in self.type.content_type.iter_subelements(): + for e in self.type.content_type.iter_elements(): yield e + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) + def _parse(self): XsdComponent._parse(self) self._parse_attributes() index = self._parse_type() self._parse_identity_constraints(index) - if self.parent is None: - self._parse_substitution_group() + if self.parent is None and 'substitutionGroup' in self.elem.attrib: + self._parse_substitution_group(self.elem.attrib['substitutionGroup']) def _parse_attributes(self): - elem = self.elem - attrib = elem.attrib - self._parse_particle(elem) + self._parse_particle(self.elem) - try: - self.qualified = (self.form or self.schema.element_form_default) == 'qualified' - except ValueError as err: - self.parse_error(err) - - name = elem.get('name') - if name is not None: - if self.parent is None or self.qualified: - self.name = get_qname(self.target_namespace, attrib['name']) - else: - self.name = attrib['name'] - elif self.parent is None: - self.parse_error("missing 'name' in a global element declaration") - self.name = elem.get('ref', 'nameless_%s' % str(id(self))) - elif 'ref' not in attrib: - self.parse_error("missing both 'name' and 'ref' attributes") - self.name = elem.get('nameless_%s' % str(id(self))) - else: + attrib = self.elem.attrib + if self._parse_reference(): try: - element_name = self.schema.resolve_qname(attrib['ref']) - except ValueError as err: - self.parse_error(err) + xsd_element = self.maps.lookup_element(self.name) + except KeyError: + self.parse_error('unknown element %r' % self.name) self.type = self.maps.types[XSD_ANY_TYPE] - self.name = elem.get('nameless_%s' % str(id(self))) else: - if not element_name: - self.parse_error("empty 'ref' attribute") - self.type = self.maps.types[XSD_ANY_TYPE] - self.name = elem.get('nameless_%s' % str(id(self))) - else: - try: - xsd_element = self.maps.lookup_element(element_name) - except KeyError: - self.parse_error('unknown element %r' % element_name) - self.name = element_name - self.type = self.maps.types[XSD_ANY_TYPE] - else: - self._ref = xsd_element - self.name = xsd_element.name - self.type = xsd_element.type - self.qualified = xsd_element.qualified + self.ref = xsd_element + self.type = xsd_element.type + self.qualified = xsd_element.qualified - for attr_name in ('name', 'type', 'nillable', 'default', 'fixed', 'form', + for attr_name in ('type', 'nillable', 'default', 'fixed', 'form', 'block', 'abstract', 'final', 'substitutionGroup'): if attr_name in attrib: self.parse_error("attribute %r is not allowed when element reference is used." % attr_name) return + if 'form' in attrib: + try: + self._form = get_xsd_form_attribute(self.elem, 'form') + except ValueError as err: + self.parse_error(err) + + if (self.form or self.schema.element_form_default) == 'qualified': + self.qualified = True + + try: + if self.parent is None or self.qualified: + self.name = get_qname(self.target_namespace, attrib['name']) + else: + self.name = attrib['name'] + except KeyError: + pass + if 'default' in attrib and 'fixed' in attrib: self.parse_error("'default' and 'fixed' attributes are mutually exclusive.") - if 'abstract' in elem.attrib: - try: - self._abstract = get_xml_bool_attribute(elem, 'abstract') - except ValueError as err: - self.parse_error(err, elem) - else: - if self.parent is not None: - self.parse_error("local scope elements cannot have abstract attribute") + if 'abstract' in attrib: + if self.parent is not None: + self.parse_error("local scope elements cannot have abstract attribute") + if self._parse_boolean_attribute('abstract'): + self._abstract = True - if 'block' in elem.attrib: + if 'block' in attrib: try: self._block = get_xsd_derivation_attribute( - elem, 'block', ('extension', 'restriction', 'substitution') + self.elem, 'block', ('extension', 'restriction', 'substitution') ) except ValueError as err: - self.parse_error(err, elem) + self.parse_error(err) + + if self._parse_boolean_attribute('nillable'): + self._nillable = True if self.parent is None: - self._parse_properties('nillable') - - if 'final' in elem.attrib: + if 'final' in attrib: try: - self._final = get_xsd_derivation_attribute(elem, 'final', ('extension', 'restriction')) + self._final = get_xsd_derivation_attribute(self.elem, 'final', ('extension', 'restriction')) except ValueError as err: - self.parse_error(err, elem) + self.parse_error(err) for attr_name in ('ref', 'form', 'minOccurs', 'maxOccurs'): if attr_name in attrib: self.parse_error("attribute %r not allowed in a global element declaration" % attr_name) else: - self._parse_properties('form', 'nillable') - for attr_name in ('final', 'substitutionGroup'): if attr_name in attrib: self.parse_error("attribute %r not allowed in a local element declaration" % attr_name) def _parse_type(self): attrib = self.elem.attrib - if self.ref: - if self._parse_component(self.elem, required=False, strict=False) is not None: + if self.ref is not None: + if self._parse_child_component(self.elem, strict=False) is not None: self.parse_error("element reference declaration can't has children.") elif 'type' in attrib: try: - self.type = self.maps.lookup_type(self.schema.resolve_qname(attrib['type'])) - except KeyError: - self.parse_error('unknown type %r' % attrib['type']) - self.type = self.maps.types[XSD_ANY_TYPE] - except ValueError as err: + type_qname = self.schema.resolve_qname(attrib['type']) + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err) self.type = self.maps.types[XSD_ANY_TYPE] + else: + try: + self.type = self.maps.lookup_type(type_qname) + except KeyError: + self.parse_error('unknown type %r' % attrib['type']) + self.type = self.maps.types[XSD_ANY_TYPE] finally: - child = self._parse_component(self.elem, required=False, strict=False) + child = self._parse_child_component(self.elem, strict=False) if child is not None and child.tag in (XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE): msg = "the attribute 'type' and the <%s> local declaration are mutually exclusive" self.parse_error(msg % child.tag.split('}')[-1]) else: - child = self._parse_component(self.elem, required=False, strict=False) + child = self._parse_child_component(self.elem, strict=False) if child is not None: if child.tag == XSD_COMPLEX_TYPE: self.type = self.schema.BUILDERS.complex_type_class(child, self.schema, self) @@ -246,47 +244,53 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if not self.type.is_valid(attrib['default']): msg = "'default' value {!r} is not compatible with the type {!r}" self.parse_error(msg.format(attrib['default'], self.type)) - elif self.schema.XSD_VERSION == '1.0' and ( + elif self.xsd_version == '1.0' and ( self.type.name == XSD_ID or self.type.is_derived(self.schema.meta_schema.types['ID'])): self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'") elif 'fixed' in attrib: if not self.type.is_valid(attrib['fixed']): msg = "'fixed' value {!r} is not compatible with the type {!r}" self.parse_error(msg.format(attrib['fixed'], self.type)) - elif self.schema.XSD_VERSION == '1.0' and ( + elif self.xsd_version == '1.0' and ( self.type.name == XSD_ID or self.type.is_derived(self.schema.meta_schema.types['ID'])): self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'") return 0 def _parse_identity_constraints(self, index=0): - self.constraints = {} - for child in self._iterparse_components(self.elem, start=index): + if self.ref is not None: + self.identities = self.ref.identities + return + + self.identities = {} + for child in filter(lambda x: x.tag != XSD_ANNOTATION, self.elem[index:]): if child.tag == XSD_UNIQUE: - constraint = XsdUnique(child, self.schema, self) + constraint = self.schema.BUILDERS.unique_class(child, self.schema, self) elif child.tag == XSD_KEY: - constraint = XsdKey(child, self.schema, self) + constraint = self.schema.BUILDERS.key_class(child, self.schema, self) elif child.tag == XSD_KEYREF: - constraint = XsdKeyref(child, self.schema, self) + constraint = self.schema.BUILDERS.keyref_class(child, self.schema, self) else: continue # Error already caught by validation against the meta-schema + if constraint.ref: + if constraint.name in self.identities: + self.parse_error("duplicated identity constraint %r:" % constraint.name, child) + self.identities[constraint.name] = constraint + continue + try: - if child != self.maps.constraints[constraint.name]: + if child != self.maps.identities[constraint.name]: self.parse_error("duplicated identity constraint %r:" % constraint.name, child) except KeyError: - self.maps.constraints[constraint.name] = constraint + self.maps.identities[constraint.name] = constraint finally: - self.constraints[constraint.name] = constraint - - def _parse_substitution_group(self): - substitution_group = self.elem.get('substitutionGroup') - if substitution_group is None: - return + self.identities[constraint.name] = constraint + def _parse_substitution_group(self, substitution_group): try: substitution_group_qname = self.schema.resolve_qname(substitution_group) - except ValueError as err: + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err) return else: @@ -333,63 +337,75 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) @property def built(self): - return self.type.parent is None or self.type.built + return (self.type.parent is None or self.type.built) and \ + all(c.built for c in self.identities.values()) @property def validation_attempted(self): if self.built: return 'full' + elif self.type.validation_attempted == 'partial': + return 'partial' + elif any(c.validation_attempted == 'partial' for c in self.identities.values()): + return 'partial' else: - return self.type.validation_attempted - - # XSD declaration attributes - @property - def ref(self): - return self.elem.get('ref') + return 'none' # Global element's exclusive properties @property def abstract(self): - return self._abstract if self._ref is None else self._ref.abstract + return self._abstract if self.ref is None else self.ref.abstract @property def final(self): - return self._final or self.schema.final_default if self._ref is None else self._ref.final + if self.ref is not None: + return self.ref.final + elif self._final is not None: + return self._final + return self.schema.final_default @property def block(self): - return self._block or self.schema.block_default if self._ref is None else self._ref.block - - @property - def substitution_group(self): - return self._substitution_group if self._ref is None else self._ref.substitution_group - - @property - def default(self): - return self.elem.get('default') if self._ref is None else self._ref.default - - @property - def fixed(self): - return self.elem.get('fixed') if self._ref is None else self._ref.fixed - - @property - def form(self): - return get_xsd_form_attribute(self.elem, 'form') if self._ref is None else self._ref.form + if self.ref is not None: + return self.ref.block + elif self._block is not None: + return self._block + return self.schema.block_default @property def nillable(self): - if self._ref is not None: - return self._ref.nillable - return get_xml_bool_attribute(self.elem, 'nillable', default=False) + return self._nillable if self.ref is None else self.ref.nillable + + @property + def substitution_group(self): + return self._substitution_group if self.ref is None else self.ref.substitution_group + + @property + def default(self): + return self.elem.get('default') if self.ref is None else self.ref.default + + @property + def fixed(self): + return self.elem.get('fixed') if self.ref is None else self.ref.fixed + + @property + def form(self): + return self._form if self.ref is None else self.ref.form def get_attribute(self, name): if name[0] != '{': return self.type.attributes[get_qname(self.type.target_namespace, name)] return self.type.attributes[name] - def get_type(self, elem): + def get_type(self, elem, inherited=None): return self.type + def get_attributes(self, xsd_type): + try: + return xsd_type.attributes + except AttributeError: + return self.attributes + def get_path(self, ancestor=None, reverse=False): """ Returns the XPath expression of the element. The path is relative to the schema instance @@ -414,12 +430,12 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def iter_components(self, xsd_classes=None): if xsd_classes is None: yield self - for obj in self.constraints.values(): + for obj in self.identities.values(): yield obj else: if isinstance(self, xsd_classes): yield self - for obj in self.constraints.values(): + for obj in self.identities.values(): if isinstance(obj, xsd_classes): yield obj @@ -429,66 +445,92 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def iter_substitutes(self): for xsd_element in self.maps.substitution_groups.get(self.name, ()): - yield xsd_element + if not xsd_element.abstract: + yield xsd_element for e in xsd_element.iter_substitutes(): - yield e + if not e.abstract: + yield e - def iter_decode(self, elem, validation='lax', **kwargs): + def data_value(self, elem): + """Returns the decoded data value of the provided element as XPath fn:data().""" + text = elem.text + if text is None: + text = self.fixed if self.fixed is not None else self.default + return self.type.text_decode(text) + + def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs): """ Creates an iterator for decoding an Element instance. :param elem: the Element that has to be decoded. :param validation: the validation mode, can be 'lax', 'strict' or 'skip. + :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding. + :param level: the depth of the element in the tree structure. :param kwargs: keyword arguments for the decoding process. :return: yields a decoded object, eventually preceded by a sequence of \ validation or decoding errors. """ - converter = kwargs.get('converter') - if not isinstance(converter, XMLSchemaConverter): - converter = kwargs['converter'] = self.schema.get_converter(converter, **kwargs) + if self.abstract: + yield self.validation_error(validation, "cannot use an abstract element for validation", elem, **kwargs) - level = kwargs.pop('level', 0) - use_defaults = kwargs.get('use_defaults', False) + if not isinstance(converter, XMLSchemaConverter): + converter = self.schema.get_converter(converter, level=level, **kwargs) + inherited = kwargs.get('inherited') value = content = attributes = None - # Get the instance type: xsi:type or the schema's declaration - if XSI_TYPE not in elem.attrib: - xsd_type = self.get_type(elem) - else: - xsi_type = elem.attrib[XSI_TYPE] + # Get the instance effective type + xsd_type = self.get_type(elem, inherited) + if XSI_TYPE in elem.attrib: + type_name = elem.attrib[XSI_TYPE].strip() try: - xsd_type = self.maps.lookup_type(converter.unmap_qname(xsi_type)) - except KeyError: - yield self.validation_error(validation, "unknown type %r" % xsi_type, elem, **kwargs) - xsd_type = self.get_type(elem) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, converter) + except (KeyError, TypeError) as err: + yield self.validation_error(validation, err, elem, **kwargs) + + if xsd_type.is_blocked(self): + yield self.validation_error(validation, "usage of %r is blocked" % xsd_type, elem, **kwargs) # Decode attributes - attribute_group = getattr(xsd_type, 'attributes', self.attributes) - for result in attribute_group.iter_decode(elem.attrib, validation, **kwargs): + attribute_group = self.get_attributes(xsd_type) + for result in attribute_group.iter_decode(elem.attrib, validation, level=level, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) else: attributes = result + if self.inheritable and any(name in self.inheritable for name in elem.attrib): + if inherited: + inherited = inherited.copy() + inherited.update((k, v) for k, v in elem.attrib.items() if k in self.inheritable) + else: + inherited = {k: v for k, v in elem.attrib.items() if k in self.inheritable} + kwargs['inherited'] = inherited + # Checks the xsi:nil attribute of the instance - if validation != 'skip' and XSI_NIL in elem.attrib: + if XSI_NIL in elem.attrib: + xsi_nil = elem.attrib[XSI_NIL].strip() if not self.nillable: yield self.validation_error(validation, "element is not nillable.", elem, **kwargs) - try: - if get_xml_bool_attribute(elem, XSI_NIL): - if elem.text is not None: - reason = "xsi:nil='true' but the element is not empty." - yield self.validation_error(validation, reason, elem, **kwargs) - else: - element_data = ElementData(elem.tag, None, None, attributes) - yield converter.element_decode(element_data, self, level) - return - except TypeError: + elif xsi_nil not in {'0', '1', 'false', 'true'}: reason = "xsi:nil attribute must has a boolean value." yield self.validation_error(validation, reason, elem, **kwargs) + elif xsi_nil in ('0', 'false'): + pass + elif elem.text is not None or len(elem): + reason = "xsi:nil='true' but the element is not empty." + yield self.validation_error(validation, reason, elem, **kwargs) + else: + element_data = ElementData(elem.tag, None, None, attributes) + yield converter.element_decode(element_data, self, level) + return if not xsd_type.has_simple_content(): - for result in xsd_type.content_type.iter_decode(elem, validation, level=level + 1, **kwargs): + for assertion in xsd_type.assertions: + for error in assertion(elem, **kwargs): + yield self.validation_error(validation, error, **kwargs) + + for result in xsd_type.content_type.iter_decode( + elem, validation, converter, level + 1, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) else: @@ -502,27 +544,42 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if self.fixed is not None: if text is None: text = self.fixed - elif text != self.fixed: + elif text == self.fixed or validation == 'skip': + pass + elif not strictly_equal(xsd_type.text_decode(text), xsd_type.text_decode(self.fixed)): reason = "must has the fixed value %r." % self.fixed yield self.validation_error(validation, reason, elem, **kwargs) - elif not text and use_defaults and self.default is not None: + + elif not text and kwargs.get('use_defaults') and self.default is not None: text = self.default - if not xsd_type.is_simple(): + if xsd_type.is_complex(): + for assertion in xsd_type.assertions: + for error in assertion(elem, value=text, **kwargs): + yield self.validation_error(validation, error, **kwargs) + + if text and xsd_type.content_type.is_list(): + value = text.split() + else: + value = text + xsd_type = xsd_type.content_type if text is None: - for result in xsd_type.iter_decode('', validation, **kwargs): + for result in xsd_type.iter_decode('', validation, _skip_id=True, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) - if kwargs.get('filler') is not None: - value = kwargs.get('filler')(self) + if 'filler' in kwargs: + value = kwargs['filler'](self) else: + if level == 0 or self.xsd_version != '1.0': + kwargs['_skip_id'] = True + for result in xsd_type.iter_decode(text, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): yield self.validation_error(validation, result, elem, **kwargs) - elif result is None and kwargs.get('filler') is not None: - value = kwargs.get('filler')(self) + elif result is None and 'filler' in kwargs: + value = kwargs['filler'](self) else: value = result @@ -544,26 +601,27 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) del content if validation != 'skip': - for constraint in self.constraints.values(): + for constraint in self.identities.values(): if isinstance(constraint, XsdKeyref) and '_no_deep' in kwargs: # TODO: Complete lazy validation continue - for error in constraint(elem): + for error in constraint(elem, converter): yield self.validation_error(validation, error, elem, **kwargs) - def iter_encode(self, obj, validation='lax', **kwargs): + def iter_encode(self, obj, validation='lax', converter=None, level=0, **kwargs): """ Creates an iterator for encoding data to an Element. :param obj: the data that has to be encoded. :param validation: the validation mode: can be 'lax', 'strict' or 'skip'. + :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \ + for the encoding. + :param level: the depth of the element data in the tree structure. :param kwargs: keyword arguments for the encoding process. :return: yields an Element, eventually preceded by a sequence of \ validation or encoding errors. """ - converter = kwargs.get('converter') if not isinstance(converter, XMLSchemaConverter): - converter = self.schema.get_converter(converter, **kwargs) - level = kwargs.pop('level', 0) + converter = self.schema.get_converter(converter, level=level, **kwargs) element_data = converter.element_encode(obj, self, level) errors = [] @@ -572,30 +630,30 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) children = element_data.content attributes = () - if element_data.attributes and XSI_TYPE in element_data.attributes: - xsi_type = element_data.attributes[XSI_TYPE] + xsd_type = self.get_type(element_data) + if XSI_TYPE in element_data.attributes: + type_name = element_data.attributes[XSI_TYPE].strip() try: - xsd_type = self.maps.lookup_type(converter.unmap_qname(xsi_type)) - except KeyError: - errors.append("unknown type %r" % xsi_type) - xsd_type = self.get_type(element_data) - else: - xsd_type = self.get_type(element_data) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, converter) + except (KeyError, TypeError) as err: + errors.append(err) - attribute_group = getattr(xsd_type, 'attributes', self.attributes) + attribute_group = self.get_attributes(xsd_type) for result in attribute_group.iter_encode(element_data.attributes, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): errors.append(result) else: attributes = result - if validation != 'skip' and XSI_NIL in element_data.attributes: + if XSI_NIL in element_data.attributes: + xsi_nil = element_data.attributes[XSI_NIL].strip() if not self.nillable: errors.append("element is not nillable.") - xsi_nil = element_data.attributes[XSI_NIL] - if xsi_nil.strip() not in ('0', '1', 'true', 'false'): + elif xsi_nil not in {'0', '1', 'true', 'false'}: errors.append("xsi:nil attribute must has a boolean value.") - if element_data.text is not None: + elif xsi_nil in ('0', 'false'): + pass + elif element_data.text is not None or element_data.content: errors.append("xsi:nil='true' but the element is not empty.") else: elem = converter.etree_element(element_data.tag, attrib=attributes, level=level) @@ -626,7 +684,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) text = result else: for result in xsd_type.content_type.iter_encode( - element_data, validation, level=level + 1, **kwargs): + element_data, validation, converter, level + 1, **kwargs): if isinstance(result, XMLSchemaValidationError): errors.append(result) elif result: @@ -640,6 +698,29 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) yield elem del element_data + def is_matching(self, name, default_namespace=None, group=None): + if default_namespace and name[0] != '{': + name = '{%s}%s' % (default_namespace, name) + + if name in self.names: + return True + + for xsd_element in self.iter_substitutes(): + if name in xsd_element.names: + return True + return False + + def match(self, name, default_namespace=None, **kwargs): + if default_namespace and name[0] != '{': + name = '{%s}%s' % (default_namespace, name) + + if name in self.names: + return self + + for xsd_element in self.iter_substitutes(): + if name in xsd_element.names: + return xsd_element + def is_restriction(self, other, check_occurs=True): if isinstance(other, XsdAnyElement): if self.min_occurs == self.max_occurs == 0: @@ -648,23 +729,31 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return False return other.is_matching(self.name, self.default_namespace) elif isinstance(other, XsdElement): - if self.name != other.name: - substitution_group = self.substitution_group + if self.name == other.name: + pass + elif any(n not in other.names for n in self.names): + if other.name == self.substitution_group and \ + other.min_occurs != other.max_occurs and \ + self.max_occurs != 0 and not other.abstract \ + and self.xsd_version == '1.0': + # An UPA violation case. Base is the head element, it's not + # abstract and has non deterministic occurs: this is less + # restrictive than W3C test group (elemZ026), marked as + # invalid despite it's based on an abstract declaration. + # See also test case invalid_restrictions1.xsd. + return False - if other.name == self.substitution_group and other.min_occurs != other.max_occurs \ - and self.max_occurs != 0 and not other.abstract: - # Base is the head element, it's not abstract and has non deterministic occurs: this - # is less restrictive than W3C test group (elemZ026), marked as invalid despite it's - # based on an abstract declaration. - return False - elif self.substitution_group is None: - return False - elif not any(e.name == self.name for e in self.maps.substitution_groups[substitution_group]): + for e in other.iter_substitutes(): + if e.name == self.name: + break + else: return False + else: + return False if check_occurs and not self.has_occurs_restriction(other): return False - elif self.type is not other.type and self.type.elem is not other.type.elem and \ + elif not self.is_consistent(other) and self.type.elem is not other.type.elem and \ not self.type.is_derived(other.type, 'restriction') and not other.type.abstract: return False elif self.fixed != other.fixed and self.type.normalize(self.fixed) != other.type.normalize(other.fixed): @@ -673,7 +762,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return False elif any(value not in self.block for value in other.block.split()): return False - elif not all(k in other.constraints for k in self.constraints): + elif not all(k in other.identities for k in self.identities): return False else: return True @@ -681,7 +770,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) if other.is_empty() and self.max_occurs != 0: return False - check_group_items_occurs = self.schema.XSD_VERSION == '1.0' + check_group_items_occurs = self.xsd_version == '1.0' counter = ParticleCounter() for e in other.iter_model(): if not isinstance(e, (XsdElement, XsdAnyElement)): @@ -706,7 +795,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return False return True - def overlap(self, other): + def is_overlap(self, other): if isinstance(other, XsdElement): if self.name == other.name: return True @@ -720,62 +809,118 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) return True return False + def is_consistent(self, other): + """ + Element Declarations Consistent check between two element particles. + + Ref: https://www.w3.org/TR/xmlschema-1/#cos-element-consistent + + :returns: `True` if there is no inconsistency between the particles, `False` otherwise, + """ + return self.name != other.name or self.type is other.type + class Xsd11Element(XsdElement): """ - Class for XSD 1.1 'element' declarations. + Class for XSD 1.1 *element* declarations. - - Content: (annotation?, ((simpleType | complexType)?, alternative*, (unique | key | keyref)*)) - + .. + Content: (annotation?, ((simpleType | complexType)?, alternative*, (unique | key | keyref)*)) + """ + _target_namespace = None + def _parse(self): XsdComponent._parse(self) self._parse_attributes() index = self._parse_type() index = self._parse_alternatives(index) self._parse_identity_constraints(index) - if self.parent is None: - self._parse_substitution_group() + + if self.parent is None and 'substitutionGroup' in self.elem.attrib: + for substitution_group in self.elem.attrib['substitutionGroup'].split(): + self._parse_substitution_group(substitution_group) + self._parse_target_namespace() + if any(v.inheritable for v in self.attributes.values()): + self.inheritable = {k: v for k, v in self.attributes.items() if v.inheritable} + def _parse_alternatives(self, index=0): - if self._ref is not None: - self.alternatives = self._ref.alternatives + if self.ref is not None: + self.alternatives = self.ref.alternatives else: - self.alternatives = [] - for child in self._iterparse_components(self.elem, start=index): + alternatives = [] + has_test = True + for child in filter(lambda x: x.tag != XSD_ANNOTATION, self.elem[index:]): if child.tag == XSD_ALTERNATIVE: - self.alternatives.append(XsdAlternative(child, self.schema, self)) + alternatives.append(XsdAlternative(child, self.schema, self)) + if not has_test: + self.parse_error("test attribute missing on non-final alternative") + has_test = 'test' in child.attrib index += 1 else: break + if alternatives: + self.alternatives = alternatives + return index @property - def target_namespace(self): - try: - return self.elem.attrib['targetNamespace'] - except KeyError: - return self.schema.target_namespace + def built(self): + return (self.type.parent is None or self.type.built) and \ + all(c.built for c in self.identities.values()) and \ + all(a.built for a in self.alternatives) - def get_type(self, elem): + @property + def target_namespace(self): + if self._target_namespace is None: + return self.schema.target_namespace + return self._target_namespace + + def iter_components(self, xsd_classes=None): + if xsd_classes is None: + yield self + for obj in self.identities.values(): + yield obj + else: + if isinstance(self, xsd_classes): + yield self + for obj in self.identities.values(): + if isinstance(obj, xsd_classes): + yield obj + + for alt in self.alternatives: + for obj in alt.iter_components(xsd_classes): + yield obj + + if self.ref is None and self.type.parent is not None: + for obj in self.type.iter_components(xsd_classes): + yield obj + + def iter_substitutes(self): + for xsd_element in self.maps.substitution_groups.get(self.name, ()): + yield xsd_element + for e in xsd_element.iter_substitutes(): + yield e + + def get_type(self, elem, inherited=None): if not self.alternatives: return self.type @@ -786,74 +931,169 @@ class Xsd11Element(XsdElement): else: elem = etree_element(elem.tag) - for alt in self.alternatives: - if alt.type is not None and boolean_value(list(alt.token.select(context=XPathContext(root=elem)))): - return alt.type + if inherited: + dummy = etree_element('_dummy_element', attrib=inherited) + + for alt in filter(lambda x: x.type is not None, self.alternatives): + if alt.token is None or alt.test(elem) or alt.test(dummy): + return alt.type + else: + for alt in filter(lambda x: x.type is not None, self.alternatives): + if alt.token is None or alt.test(elem): + return alt.type + return self.type - def overlap(self, other): + def is_overlap(self, other): if isinstance(other, XsdElement): if self.name == other.name: return True - elif other.substitution_group == self.name or other.name == self.substitution_group: + elif any(self.name == x.name for x in other.iter_substitutes()): return True + + for e in self.iter_substitutes(): + if other.name == e.name or any(x is e for x in other.iter_substitutes()): + return True + + elif isinstance(other, XsdAnyElement): + if other.is_matching(self.name, self.default_namespace): + return True + for e in self.maps.substitution_groups.get(self.name, ()): + if other.is_matching(e.name, self.default_namespace): + return True return False + def is_consistent(self, other, strict=True): + if isinstance(other, XsdAnyElement): + if other.process_contents == 'skip': + return True + xsd_element = other.match(self.name, self.default_namespace, resolve=True) + return xsd_element is None or self.is_consistent(xsd_element, strict=False) + + if self.name == other.name: + e = self + else: + for e in self.iter_substitutes(): + if e.name == other.name: + break + else: + return True + + if len(e.alternatives) != len(other.alternatives): + return False + elif e.type is not other.type and strict: + return False + elif e.type is not other.type or \ + not all(any(a == x for x in other.alternatives) for a in e.alternatives) or \ + not all(any(a == x for x in e.alternatives) for a in other.alternatives): + msg = "Maybe a not equivalent type table between elements %r and %r." % (self, other) + warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) + return True + class XsdAlternative(XsdComponent): """ - - Content: (annotation?, (simpleType | complexType)?) - + XSD 1.1 type *alternative* definitions. + + .. + Content: (annotation?, (simpleType | complexType)?) + """ - _admitted_tags = {XSD_ALTERNATIVE} type = None + path = None + token = None + _ADMITTED_TAGS = {XSD_ALTERNATIVE} + + def __init__(self, elem, schema, parent): + super(XsdAlternative, self).__init__(elem, schema, parent) def __repr__(self): return '%s(type=%r, test=%r)' % (self.__class__.__name__, self.elem.get('type'), self.elem.get('test')) + def __eq__(self, other): + return self.path == other.path and self.type is other.type and \ + self.xpath_default_namespace == other.xpath_default_namespace + + def __ne__(self, other): + return self.path != other.path or self.type is not other.type or \ + self.xpath_default_namespace != other.xpath_default_namespace + def _parse(self): XsdComponent._parse(self) attrib = self.elem.attrib - try: - self.path = attrib['test'] - except KeyError as err: - self.path = 'true()' - self.parse_error(err) if 'xpathDefaultNamespace' in attrib: self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - parser = XPath2Parser(self.namespaces, strict=False, default_namespace=self.xpath_default_namespace) + parser = XPath2Parser( + self.namespaces, strict=False, default_namespace=self.xpath_default_namespace + ) try: - self.token = parser.parse(self.path) - except ElementPathSyntaxError as err: - self.parse_error(err) - self.token = parser.parse('true()') - self.path = 'true()' + self.path = attrib['test'] + except KeyError: + pass # an absent test is not an error, it should be the default type + else: + try: + self.token = parser.parse(self.path) + except ElementPathError as err: + self.parse_error(err) + self.token = parser.parse('false()') + self.path = 'false()' try: type_qname = self.schema.resolve_qname(attrib['type']) - except KeyError: - self.parse_error("missing 'type' attribute") - except ValueError as err: - self.parse_error(err) + except (KeyError, ValueError, RuntimeError) as err: + if 'type' in attrib: + self.parse_error(err) + self.type = self.maps.lookup_type(XSD_ANY_TYPE) + else: + child = self._parse_child_component(self.elem, strict=False) + if child is None or child.tag not in (XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE): + self.parse_error("missing 'type' attribute") + self.type = self.maps.lookup_type(XSD_ANY_TYPE) + elif child.tag == XSD_COMPLEX_TYPE: + self.type = self.schema.BUILDERS.complex_type_class(child, self.schema, self) + else: + self.type = self.schema.BUILDERS.simple_type_factory(child, self.schema, self) else: try: self.type = self.maps.lookup_type(type_qname) except KeyError: self.parse_error("unknown type %r" % attrib['type']) else: - if not self.type.is_derived(self.parent.type): - self.parse_error("type %r ir not derived from %r" % (attrib['type'], self.parent.type)) + if self.type.name != XSD_ERROR and not self.type.is_derived(self.parent.type): + msg = "type {!r} is not derived from {!r}" + self.parse_error(msg.format(attrib['type'], self.parent.type)) + + child = self._parse_child_component(self.elem, strict=False) + if child is not None and child.tag in (XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE): + msg = "the attribute 'type' and the <%s> local declaration are mutually exclusive" + self.parse_error(msg % child.tag.split('}')[-1]) @property def built(self): - raise NotImplementedError + return self.type.parent is None or self.type.built + + @property + def validation_attempted(self): + return 'full' if self.built else self.type.validation_attempted + + def iter_components(self, xsd_classes=None): + if xsd_classes is None or isinstance(self, xsd_classes): + yield self + if self.type is not None and self.type.parent is not None: + for obj in self.type.iter_components(xsd_classes): + yield obj + + def test(self, elem): + try: + return self.token.boolean_value(list(self.token.select(context=XPathContext(elem)))) + except (TypeError, ValueError): + return False diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py index cbe5188..3ed988f 100644 --- a/xmlschema/validators/exceptions.py +++ b/xmlschema/validators/exceptions.py @@ -13,10 +13,11 @@ This module contains exception and warning classes for the 'xmlschema.validators """ from __future__ import unicode_literals -from ..compat import PY3 +from ..compat import PY3, string_base_type from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError -from ..etree import etree_tostring, is_etree_element, etree_getpath -from ..helpers import qname_to_prefixed +from ..qnames import qname_to_prefixed +from ..etree import etree_tostring, etree_getpath +from ..helpers import is_etree_element from ..resources import XMLResource @@ -198,9 +199,14 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): :type namespaces: dict """ def __init__(self, validator, obj, reason=None, source=None, namespaces=None): + if not isinstance(obj, string_base_type): + _obj = obj + else: + _obj = obj.encode('ascii', 'xmlcharrefreplace').decode('utf-8') + super(XMLSchemaValidationError, self).__init__( validator=validator, - message="failed validating {!r} with {!r}".format(obj, validator), + message="failed validating {!r} with {!r}".format(_obj, validator), elem=obj if is_etree_element(obj) else None, source=source, namespaces=namespaces, @@ -218,8 +224,12 @@ class XMLSchemaValidationError(XMLSchemaValidatorError, ValueError): msg.append('Reason: %s\n' % self.reason) if hasattr(self.validator, 'tostring'): msg.append("Schema:\n\n%s\n" % self.validator.tostring(' ', 20)) - if self.elem is not None: - elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20) + if is_etree_element(self.elem): + try: + elem_as_string = etree_tostring(self.elem, self.namespaces, ' ', 20) + except (ValueError, TypeError): + elem_as_string = repr(self.elem) + if hasattr(self.elem, 'sourceline'): msg.append("Instance (line %r):\n\n%s\n" % (self.elem.sourceline, elem_as_string)) else: @@ -329,16 +339,16 @@ class XMLSchemaChildrenValidationError(XMLSchemaValidationError): expected_tags = [] for xsd_element in expected: if xsd_element.name is not None: - expected_tags.append(repr(xsd_element.prefixed_name)) + expected_tags.append(xsd_element.prefixed_name) elif xsd_element.process_contents == 'strict': expected_tags.append('from %r namespace/s' % xsd_element.namespace) if not expected_tags: - reason += " No child element is expected at this point." - elif len(expected_tags) > 1: - reason += " Tags %s are expected." % expected_tags - else: + pass # reason += " No child element is expected at this point." <-- this can be misleading + elif len(expected_tags) == 1: reason += " Tag %s expected." % expected_tags[0] + else: + reason += " Tag (%s) expected." % ' | '.join(expected_tags) super(XMLSchemaChildrenValidationError, self).__init__(validator, elem, reason, source, namespaces) @@ -349,3 +359,7 @@ class XMLSchemaIncludeWarning(XMLSchemaWarning): class XMLSchemaImportWarning(XMLSchemaWarning): """A schema namespace import fails.""" + + +class XMLSchemaTypeTableWarning(XMLSchemaWarning): + """Not equivalent type table found in model.""" diff --git a/xmlschema/validators/facets.py b/xmlschema/validators/facets.py index 2937926..e018229 100644 --- a/xmlschema/validators/facets.py +++ b/xmlschema/validators/facets.py @@ -13,13 +13,16 @@ This module contains declarations and classes for XML Schema constraint facets. """ from __future__ import unicode_literals import re -from elementpath import XPath2Parser, ElementPathError, datatypes +import operator +from elementpath import XPath2Parser, ElementPathError +from elementpath.datatypes import XSD_BUILTIN_TYPES from ..compat import unicode_type, MutableSequence -from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, XSD_WHITE_SPACE, \ - XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, \ - XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ASSERTION, XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, \ - XSD_BASE64_BINARY, XSD_HEX_BINARY +from ..qnames import XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_ENUMERATION, \ + XSD_WHITE_SPACE, XSD_PATTERN, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, XSD_MIN_INCLUSIVE, \ + XSD_MIN_EXCLUSIVE, XSD_TOTAL_DIGITS, XSD_FRACTION_DIGITS, XSD_ASSERTION, \ + XSD_EXPLICIT_TIMEZONE, XSD_NOTATION_TYPE, XSD_BASE64_BINARY, XSD_HEX_BINARY, XSD_QNAME +from ..helpers import count_digits from ..regex import get_python_regex from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError @@ -30,6 +33,8 @@ class XsdFacet(XsdComponent): """ XML Schema constraining facets base class. """ + fixed = False + def __init__(self, elem, schema, parent, base_type): self.base_type = base_type super(XsdFacet, self).__init__(elem, schema, parent) @@ -38,39 +43,35 @@ class XsdFacet(XsdComponent): return '%s(value=%r, fixed=%r)' % (self.__class__.__name__, self.value, self.fixed) def __call__(self, value): - for error in self.validator(value): - yield error + try: + for error in self.validator(value): + yield error + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) def _parse(self): super(XsdFacet, self)._parse() - elem = self.elem - self.fixed = elem.get('fixed', False) + if 'fixed' in self.elem.attrib and self.elem.attrib['fixed'] in ('true', '1'): + self.fixed = True base_facet = self.base_facet self.base_value = None if base_facet is None else base_facet.value try: - self._parse_value(elem) + self._parse_value(self.elem) except (KeyError, ValueError, XMLSchemaDecodeError) as err: self.value = None self.parse_error(unicode_type(err)) else: if base_facet is not None and base_facet.fixed and \ base_facet.value is not None and self.value != base_facet.value: - self.parse_error("%r facet value is fixed to %r" % (elem.tag, base_facet.value)) + self.parse_error("%r facet value is fixed to %r" % (self.elem.tag, base_facet.value)) def _parse_value(self, elem): self.value = elem.attrib['value'] @property def built(self): - return self.base_type.is_global or self.base_type.built - - @property - def validation_attempted(self): - if self.built: - return 'full' - else: - return self.base_type.validation_attempted + return True @property def base_facet(self): @@ -95,17 +96,17 @@ class XsdFacet(XsdComponent): class XsdWhiteSpaceFacet(XsdFacet): """ - XSD whiteSpace facet. + XSD *whiteSpace* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_WHITE_SPACE, + _ADMITTED_TAGS = XSD_WHITE_SPACE, def _parse_value(self, elem): self.value = value = elem.attrib['value'] @@ -131,17 +132,17 @@ class XsdWhiteSpaceFacet(XsdFacet): class XsdLengthFacet(XsdFacet): """ - XSD length facet. + XSD *length* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_LENGTH, + _ADMITTED_TAGS = XSD_LENGTH, def _parse_value(self, elem): self.value = int(elem.attrib['value']) @@ -155,6 +156,8 @@ class XsdLengthFacet(XsdFacet): self.validator = self.hex_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_length_validator + elif primitive_type.name == XSD_QNAME: + pass # See: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4009 else: self.validator = self.length_validator @@ -174,17 +177,17 @@ class XsdLengthFacet(XsdFacet): class XsdMinLengthFacet(XsdFacet): """ - XSD minLength facet. + XSD *minLength* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MIN_LENGTH, + _ADMITTED_TAGS = XSD_MIN_LENGTH, def _parse_value(self, elem): self.value = int(elem.attrib['value']) @@ -198,7 +201,7 @@ class XsdMinLengthFacet(XsdFacet): self.validator = self.hex_min_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_min_length_validator - else: + elif primitive_type.name != XSD_QNAME: self.validator = self.min_length_validator def min_length_validator(self, x): @@ -217,17 +220,17 @@ class XsdMinLengthFacet(XsdFacet): class XsdMaxLengthFacet(XsdFacet): """ - XSD maxLength facet. + XSD *maxLength* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MAX_LENGTH, + _ADMITTED_TAGS = XSD_MAX_LENGTH, def _parse_value(self, elem): self.value = int(elem.attrib['value']) @@ -241,7 +244,7 @@ class XsdMaxLengthFacet(XsdFacet): self.validator = self.hex_max_length_validator elif primitive_type.name == XSD_BASE64_BINARY: self.validator = self.base64_max_length_validator - else: + elif primitive_type.name != XSD_QNAME: self.validator = self.max_length_validator def max_length_validator(self, x): @@ -260,20 +263,23 @@ class XsdMaxLengthFacet(XsdFacet): class XsdMinInclusiveFacet(XsdFacet): """ - XSD minInclusive facet. + XSD *minInclusive* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MIN_INCLUSIVE, + _ADMITTED_TAGS = XSD_MIN_INCLUSIVE, def _parse_value(self, elem): - self.value = self.base_type.decode(elem.attrib['value']) + try: + self.value = self.base_type.primitive_type.decode(elem.attrib['value']) + except AttributeError: + self.value = self.base_type.decode(elem.attrib['value']) facet = self.base_type.get_facet(XSD_MIN_EXCLUSIVE) if facet is not None and facet.value >= self.value: @@ -288,27 +294,34 @@ class XsdMinInclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x < self.value: - yield XMLSchemaValidationError(self, x, "value has to be greater or equal than %r." % self.value) + def __call__(self, value): + try: + if value < self.value: + reason = "value has to be greater or equal than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMinExclusiveFacet(XsdFacet): """ - XSD minExclusive facet. + XSD *minExclusive* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MIN_EXCLUSIVE, + _ADMITTED_TAGS = XSD_MIN_EXCLUSIVE, def _parse_value(self, elem): - self.value = self.base_type.decode(elem.attrib['value']) + try: + self.value = self.base_type.primitive_type.decode(elem.attrib['value']) + except AttributeError: + self.value = self.base_type.decode(elem.attrib['value']) facet = self.base_type.get_facet(XSD_MIN_EXCLUSIVE) if facet is not None and facet.value > self.value: @@ -323,27 +336,34 @@ class XsdMinExclusiveFacet(XsdFacet): if facet is not None and facet.value <= self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x <= self.value: - yield XMLSchemaValidationError(self, x, "value has to be greater than %r." % self.value) + def __call__(self, value): + try: + if value <= self.value: + reason = "value has to be greater than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMaxInclusiveFacet(XsdFacet): """ - XSD maxInclusive facet. + XSD *maxInclusive* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MAX_INCLUSIVE, + _ADMITTED_TAGS = XSD_MAX_INCLUSIVE, def _parse_value(self, elem): - self.value = self.base_type.decode(elem.attrib['value']) + try: + self.value = self.base_type.primitive_type.decode(elem.attrib['value']) + except AttributeError: + self.value = self.base_type.decode(elem.attrib['value']) facet = self.base_type.get_facet(XSD_MIN_EXCLUSIVE) if facet is not None and facet.value >= self.value: @@ -358,27 +378,34 @@ class XsdMaxInclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x > self.value: - yield XMLSchemaValidationError(self, x, "value has to be lesser or equal than %r." % self.value) + def __call__(self, value): + try: + if value > self.value: + reason = "value has to be lesser or equal than %r." % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdMaxExclusiveFacet(XsdFacet): """ - XSD maxExclusive facet. + XSD *maxExclusive* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_MAX_EXCLUSIVE, + _ADMITTED_TAGS = XSD_MAX_EXCLUSIVE, def _parse_value(self, elem): - self.value = self.base_type.decode(elem.attrib['value']) + try: + self.value = self.base_type.primitive_type.decode(elem.attrib['value']) + except AttributeError: + self.value = self.base_type.decode(elem.attrib['value']) facet = self.base_type.get_facet(XSD_MIN_EXCLUSIVE) if facet is not None and facet.value >= self.value: @@ -393,24 +420,28 @@ class XsdMaxExclusiveFacet(XsdFacet): if facet is not None and facet.value < self.value: self.parse_error("maximum value of base_type is lesser") - def validator(self, x): - if x >= self.value: - yield XMLSchemaValidationError(self, x, "value has to be lesser than %r" % self.value) + def __call__(self, value): + try: + if value >= self.value: + reason = "value has to be lesser than %r" % self.value + yield XMLSchemaValidationError(self, value, reason) + except (TypeError, ValueError) as err: + yield XMLSchemaValidationError(self, value, unicode_type(err)) class XsdTotalDigitsFacet(XsdFacet): """ - XSD totalDigits facet. + XSD *totalDigits* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_TOTAL_DIGITS, + _ADMITTED_TAGS = XSD_TOTAL_DIGITS, def _parse_value(self, elem): self.value = int(elem.attrib['value']) @@ -419,23 +450,25 @@ class XsdTotalDigitsFacet(XsdFacet): self.validator = self.total_digits_validator def total_digits_validator(self, x): - if len([d for d in str(x).strip('0') if d.isdigit()]) > self.value: - yield XMLSchemaValidationError(self, x, "the number of digits is greater than %r." % self.value) + if operator.add(*count_digits(x)) > self.value: + yield XMLSchemaValidationError( + self, x, "the number of digits is greater than %r." % self.value + ) class XsdFractionDigitsFacet(XsdFacet): """ - XSD fractionDigits facet. + XSD *fractionDigits* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_FRACTION_DIGITS, + _ADMITTED_TAGS = XSD_FRACTION_DIGITS, def __init__(self, elem, schema, parent, base_type): super(XsdFractionDigitsFacet, self).__init__(elem, schema, parent, base_type) @@ -451,23 +484,25 @@ class XsdFractionDigitsFacet(XsdFacet): self.validator = self.fraction_digits_validator def fraction_digits_validator(self, x): - if len(str(x).strip('0').partition('.')[2]) > self.value: - yield XMLSchemaValidationError(self, x, "the number of fraction digits is greater than %r." % self.value) + if count_digits(x)[1] > self.value: + yield XMLSchemaValidationError( + self, x, "the number of fraction digits is greater than %r." % self.value + ) class XsdExplicitTimezoneFacet(XsdFacet): """ - XSD 1.1 explicitTimezone facet. + XSD 1.1 *explicitTimezone* facet. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = XSD_EXPLICIT_TIMEZONE, + _ADMITTED_TAGS = XSD_EXPLICIT_TIMEZONE, def _parse_value(self, elem): self.value = value = elem.attrib['value'] @@ -489,16 +524,16 @@ class XsdExplicitTimezoneFacet(XsdFacet): class XsdEnumerationFacets(MutableSequence, XsdFacet): """ - Sequence of XSD enumeration facets. Values are validates if match any of enumeration values. + Sequence of XSD *enumeration* facets. Values are validates if match any of enumeration values. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_ENUMERATION} + _ADMITTED_TAGS = {XSD_ENUMERATION} def __init__(self, elem, schema, parent, base_type): XsdFacet.__init__(self, elem, schema, parent, base_type) @@ -519,12 +554,12 @@ class XsdEnumerationFacets(MutableSequence, XsdFacet): if self.base_type.name == XSD_NOTATION_TYPE: try: notation_qname = self.schema.resolve_qname(value) - except ValueError as err: + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err, elem) else: if notation_qname not in self.maps.notations: - self.parse_error("value {} must match a notation global declaration".format(value), elem) - + msg = "value {!r} must match a notation declaration" + self.parse_error(msg.format(value), elem) return value # Implements the abstract methods of MutableSequence @@ -563,16 +598,16 @@ class XsdEnumerationFacets(MutableSequence, XsdFacet): class XsdPatternFacets(MutableSequence, XsdFacet): """ - Sequence of XSD pattern facets. Values are validates if match any of patterns. + Sequence of XSD *pattern* facets. Values are validates if match any of patterns. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_PATTERN} + _ADMITTED_TAGS = {XSD_PATTERN} def __init__(self, elem, schema, parent, base_type): XsdFacet.__init__(self, elem, schema, parent, base_type) @@ -584,7 +619,7 @@ class XsdPatternFacets(MutableSequence, XsdFacet): def _parse_value(self, elem): try: - return re.compile(get_python_regex(elem.attrib['value'])) + return re.compile(get_python_regex(elem.attrib['value'], self.xsd_version)) except KeyError: self.parse_error("missing 'value' attribute", elem) return re.compile(r'^$') @@ -619,28 +654,52 @@ class XsdPatternFacets(MutableSequence, XsdFacet): return '%s(%s...\'])' % (self.__class__.__name__, s[:70]) def __call__(self, text): - if all(pattern.match(text) is None for pattern in self.patterns): - msg = "value doesn't match any pattern of %r." - yield XMLSchemaValidationError(self, text, reason=msg % self.regexps) + try: + if all(pattern.match(text) is None for pattern in self.patterns): + msg = "value doesn't match any pattern of %r." + yield XMLSchemaValidationError(self, text, reason=msg % self.regexps) + except TypeError as err: + yield XMLSchemaValidationError(self, text, unicode_type(err)) @property def regexps(self): return [e.get('value', '') for e in self._elements] +class XsdAssertionXPathParser(XPath2Parser): + """Parser for XSD 1.1 assertion facets.""" + + +XsdAssertionXPathParser.unregister('last') +XsdAssertionXPathParser.unregister('position') + + +@XsdAssertionXPathParser.method(XsdAssertionXPathParser.function('last', nargs=0)) +def evaluate(self, context=None): + self.missing_context("Context item size is undefined") + + +@XsdAssertionXPathParser.method(XsdAssertionXPathParser.function('position', nargs=0)) +def evaluate(self, context=None): + self.missing_context("Context item position is undefined") + + +XsdAssertionXPathParser.build_tokenizer() + + class XsdAssertionFacet(XsdFacet): """ - XSD 1.1 assertion facet for simpleType definitions. + XSD 1.1 *assertion* facet for simpleType definitions. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_ASSERTION} + _ADMITTED_TAGS = {XSD_ASSERTION} def __repr__(self): return '%s(test=%r)' % (self.__class__.__name__, self.path) @@ -653,15 +712,18 @@ class XsdAssertionFacet(XsdFacet): self.parse_error(str(err), elem=self.elem) self.path = 'true()' - builtin_type_name = self.base_type.primitive_type.local_name - variables = {'value': datatypes.XSD_BUILTIN_TYPES[builtin_type_name].value} + try: + builtin_type_name = self.base_type.primitive_type.local_name + variables = {'value': XSD_BUILTIN_TYPES[builtin_type_name].value} + except AttributeError: + variables = {'value': XSD_BUILTIN_TYPES['anySimpleType'].value} if 'xpathDefaultNamespace' in self.elem.attrib: self.xpath_default_namespace = self._parse_xpath_default_namespace(self.elem) else: self.xpath_default_namespace = self.schema.xpath_default_namespace - self.parser = XPath2Parser(self.namespaces, strict=False, variables=variables, - default_namespace=self.xpath_default_namespace) + self.parser = XsdAssertionXPathParser(self.namespaces, strict=False, variables=variables, + default_namespace=self.xpath_default_namespace) try: self.token = self.parser.parse(self.path) @@ -671,9 +733,12 @@ class XsdAssertionFacet(XsdFacet): def __call__(self, value): self.parser.variables['value'] = value - if not self.token.evaluate(): - msg = "value is not true with test path %r." - yield XMLSchemaValidationError(self, value, reason=msg % self.path) + try: + if not self.token.evaluate(): + msg = "value is not true with test path %r." + yield XMLSchemaValidationError(self, value, reason=msg % self.path) + except ElementPathError as err: + yield XMLSchemaValidationError(self, value, reason=str(err)) XSD_10_FACETS_BUILDERS = { diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index 26ee26d..d610324 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -12,87 +12,72 @@ This module contains functions and classes for namespaces XSD declarations/definitions. """ from __future__ import unicode_literals -import re import warnings from collections import Counter +from ..compat import string_base_type from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning -from ..namespaces import XSD_NAMESPACE -from ..qnames import XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, \ - XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ELEMENT -from ..helpers import get_qname, local_name -from ..namespaces import NamespaceResourcesMap +from ..namespaces import XSD_NAMESPACE, NamespaceResourcesMap +from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, \ + XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, \ + XSD_ELEMENT, XSI_TYPE, get_qname, local_name, qname_to_extended -from . import XMLSchemaNotBuiltError, XMLSchemaModelError, XMLSchemaModelDepthError, XsdValidator, \ - XsdKeyref, XsdComponent, XsdAttribute, XsdSimpleType, XsdComplexType, XsdElement, XsdAttributeGroup, \ - XsdGroup, XsdNotation, XsdAssert +from . import XMLSchemaNotBuiltError, XMLSchemaModelError, XMLSchemaModelDepthError, \ + XsdValidator, XsdComponent, XsdAttribute, XsdSimpleType, XsdComplexType, XsdElement, \ + XsdAttributeGroup, XsdGroup, XsdNotation, Xsd11Element, XsdKeyref, XsdAssert from .builtins import xsd_builtin_types_factory -def camel_case_split(s): - """ - Split words of a camel case string - """ - return re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', s) - - -def iterchildren_by_tag(tag): - """ - Defines a generator that produce all child elements that have a specific tag. - """ - def iterfind_function(elem): - for e in elem: - if e.tag == tag: - yield e - iterfind_function.__name__ = str('iterfind_xsd_%ss' % '_'.join(camel_case_split(local_name(tag))).lower()) - return iterfind_function - - -iterchildren_xsd_import = iterchildren_by_tag(XSD_IMPORT) -iterchildren_xsd_include = iterchildren_by_tag(XSD_INCLUDE) -iterchildren_xsd_redefine = iterchildren_by_tag(XSD_REDEFINE) -iterchildren_xsd_override = iterchildren_by_tag(XSD_OVERRIDE) - - # # Defines the load functions for XML Schema structures -def create_load_function(filter_function): +def create_load_function(tag): def load_xsd_globals(xsd_globals, schemas): redefinitions = [] for schema in schemas: target_namespace = schema.target_namespace - for elem in iterchildren_xsd_redefine(schema.root): + + for elem in filter(lambda x: x.tag in (XSD_REDEFINE, XSD_OVERRIDE), schema.root): location = elem.get('schemaLocation') if location is None: continue - for child in filter_function(elem): + for child in filter(lambda x: x.tag == tag and 'name' in x.attrib, elem): qname = get_qname(target_namespace, child.attrib['name']) - redefinitions.append((qname, child, schema, schema.includes[location])) + redefinitions.append((qname, elem, child, schema, schema.includes[location])) - for elem in filter_function(schema.root): + for elem in filter(lambda x: x.tag == tag and 'name' in x.attrib, schema.root): qname = get_qname(target_namespace, elem.attrib['name']) - try: - xsd_globals[qname].append((elem, schema)) - except KeyError: + if qname not in xsd_globals: xsd_globals[qname] = (elem, schema) - except AttributeError: - xsd_globals[qname] = [xsd_globals[qname], (elem, schema)] + else: + try: + other_schema = xsd_globals[qname][1] + except (TypeError, IndexError): + pass + else: + # It's ignored or replaced in case of an override + if other_schema.override is schema: + continue + elif schema.override is other_schema: + xsd_globals[qname] = (elem, schema) + continue + + msg = "global {} with name={!r} is already defined" + schema.parse_error(msg.format(local_name(tag), qname)) tags = Counter([x[0] for x in redefinitions]) - for qname, elem, schema, redefined_schema in redefinitions: + for qname, elem, child, schema, redefined_schema in redefinitions: # Checks multiple redefinitions if tags[qname] > 1: tags[qname] = 1 - redefined_schemas = [x[3] for x in redefinitions if x[0] == qname] + redefined_schemas = [x[-1] for x in redefinitions if x[0] == qname] if any(redefined_schemas.count(x) > 1 for x in redefined_schemas): - schema.parse_error( - "multiple redefinition for {} {!r}".format(local_name(elem.tag), qname), elem - ) + msg = "multiple redefinition for {} {!r}" + schema.parse_error(msg.format(local_name(child.tag), qname), child) else: - redefined_schemas = {x[3]: x[2] for x in redefinitions if x[0] == qname} + redefined_schemas = {x[-1]: x[-2] for x in redefinitions if x[0] == qname} for rs, s in redefined_schemas.items(): while True: try: @@ -101,30 +86,31 @@ def create_load_function(filter_function): break if s is rs: - schema.parse_error( - "circular redefinition for {} {!r}".format(local_name(elem.tag), qname), elem - ) + msg = "circular redefinition for {} {!r}" + schema.parse_error(msg.format(local_name(child.tag), qname), child) break - # Append redefinition - try: - xsd_globals[qname].append((elem, schema)) - except KeyError: - schema.parse_error("not a redefinition!", elem) - # xsd_globals[qname] = elem, schema - except AttributeError: - xsd_globals[qname] = [xsd_globals[qname], (elem, schema)] + if elem.tag == XSD_OVERRIDE: + xsd_globals[qname] = (child, schema) + else: + # Append to a list if it's a redefine + try: + xsd_globals[qname].append((child, schema)) + except KeyError: + schema.parse_error("not a redefinition!", child) + except AttributeError: + xsd_globals[qname] = [xsd_globals[qname], (child, schema)] return load_xsd_globals -load_xsd_simple_types = create_load_function(iterchildren_by_tag(XSD_SIMPLE_TYPE)) -load_xsd_attributes = create_load_function(iterchildren_by_tag(XSD_ATTRIBUTE)) -load_xsd_attribute_groups = create_load_function(iterchildren_by_tag(XSD_ATTRIBUTE_GROUP)) -load_xsd_complex_types = create_load_function(iterchildren_by_tag(XSD_COMPLEX_TYPE)) -load_xsd_elements = create_load_function(iterchildren_by_tag(XSD_ELEMENT)) -load_xsd_groups = create_load_function(iterchildren_by_tag(XSD_GROUP)) -load_xsd_notations = create_load_function(iterchildren_by_tag(XSD_NOTATION)) +load_xsd_simple_types = create_load_function(XSD_SIMPLE_TYPE) +load_xsd_attributes = create_load_function(XSD_ATTRIBUTE) +load_xsd_attribute_groups = create_load_function(XSD_ATTRIBUTE_GROUP) +load_xsd_complex_types = create_load_function(XSD_COMPLEX_TYPE) +load_xsd_elements = create_load_function(XSD_ELEMENT) +load_xsd_groups = create_load_function(XSD_GROUP) +load_xsd_notations = create_load_function(XSD_NOTATION) def create_lookup_function(xsd_classes): @@ -133,13 +119,13 @@ def create_lookup_function(xsd_classes): else: types_desc = xsd_classes.__name__ - def lookup(global_map, qname, tag_map): + def lookup(qname, global_map, tag_map): try: obj = global_map[qname] except KeyError: if '{' in qname: - raise XMLSchemaKeyError("missing a %s component for %r!" % (types_desc, qname)) - raise XMLSchemaKeyError("missing a %s component for %r! As the name has no namespace " + raise XMLSchemaKeyError("missing an %s component for %r!" % (types_desc, qname)) + raise XMLSchemaKeyError("missing an %s component for %r! As the name has no namespace " "maybe a missing default namespace declaration." % (types_desc, qname)) else: if isinstance(obj, xsd_classes): @@ -225,7 +211,7 @@ class XsdGlobals(XsdValidator): self.notations = {} # Notations self.elements = {} # Global elements self.substitution_groups = {} # Substitution groups - self.constraints = {} # Constraints (uniqueness, keys, keyref) + self.identities = {} # Identity constraints (uniqueness, keys, keyref) self.global_maps = (self.notations, self.types, self.attributes, self.attribute_groups, self.groups, self.elements) @@ -244,30 +230,41 @@ class XsdGlobals(XsdValidator): obj.notations.update(self.notations) obj.elements.update(self.elements) obj.substitution_groups.update(self.substitution_groups) - obj.constraints.update(self.constraints) + obj.identities.update(self.identities) return obj __copy__ = copy def lookup_notation(self, qname): - return lookup_notation(self.notations, qname, self.validator.BUILDERS_MAP) + return lookup_notation(qname, self.notations, self.validator.BUILDERS_MAP) def lookup_type(self, qname): - return lookup_type(self.types, qname, self.validator.BUILDERS_MAP) + return lookup_type(qname, self.types, self.validator.BUILDERS_MAP) def lookup_attribute(self, qname): - return lookup_attribute(self.attributes, qname, self.validator.BUILDERS_MAP) + return lookup_attribute(qname, self.attributes, self.validator.BUILDERS_MAP) def lookup_attribute_group(self, qname): - return lookup_attribute_group(self.attribute_groups, qname, self.validator.BUILDERS_MAP) + return lookup_attribute_group(qname, self.attribute_groups, self.validator.BUILDERS_MAP) def lookup_group(self, qname): - return lookup_group(self.groups, qname, self.validator.BUILDERS_MAP) + return lookup_group(qname, self.groups, self.validator.BUILDERS_MAP) def lookup_element(self, qname): - return lookup_element(self.elements, qname, self.validator.BUILDERS_MAP) + return lookup_element(qname, self.elements, self.validator.BUILDERS_MAP) def lookup(self, tag, qname): + """ + General lookup method for XSD global components. + + :param tag: the expanded QName of the XSD the global declaration/definition \ + (eg. '{http://www.w3.org/2001/XMLSchema}element'), that is used to select \ + the global map for lookup. + :param qname: the expanded QName of the component to be looked-up. + :returns: an XSD global component. + :raises: an XMLSchemaValueError if the *tag* argument is not appropriate for a global \ + component, an XMLSchemaKeyError if the *qname* argument is not found in the global map. + """ if tag in (XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE): return self.lookup_type(qname) elif tag == XSD_ELEMENT: @@ -283,18 +280,38 @@ class XsdGlobals(XsdValidator): else: raise XMLSchemaValueError("wrong tag {!r} for an XSD global definition/declaration".format(tag)) + def get_instance_type(self, type_name, base_type, namespaces): + """ + Returns the instance XSI type from global maps, validating it with the reference base type. + + :param type_name: the XSI type attribute value, a QName in prefixed format. + :param base_type: the XSD from which the instance type has to be derived. + :param namespaces: a map from prefixes to namespaces. + """ + if base_type.is_complex() and XSI_TYPE in base_type.attributes: + base_type.attributes[XSI_TYPE].validate(type_name) + + extended_name = qname_to_extended(type_name, namespaces) + xsi_type = lookup_type(extended_name, self.types, self.validator.BUILDERS_MAP) + if not xsi_type.is_derived(base_type): + raise XMLSchemaTypeError("%r is not a derived type of %r" % (xsi_type, self)) + return xsi_type + @property def built(self): - for schema in self.iter_schemas(): - if not schema.built: - return False - return True + return all(schema.built for schema in self.iter_schemas()) + + @property + def unbuilt(self): + """Property that returns a list with unbuilt components.""" + return [c for s in self.iter_schemas() for c in s.iter_components() + if c is not s and not c.built] @property def validation_attempted(self): if self.built: return 'full' - elif any([schema.validation_attempted == 'partial' for schema in self.iter_schemas()]): + elif any(schema.validation_attempted == 'partial' for schema in self.iter_schemas()): return 'partial' else: return 'none' @@ -311,8 +328,12 @@ class XsdGlobals(XsdValidator): return 'notKnown' @property - def resources(self): - return [(schema.url, schema) for schemas in self.namespaces.values() for schema in schemas] + def xsd_version(self): + return self.validator.XSD_VERSION + + @property + def builders_map(self): + return self.validator.BUILDERS_MAP @property def all_errors(self): @@ -321,6 +342,13 @@ class XsdGlobals(XsdValidator): errors.extend(schema.all_errors) return errors + @property + def constraints(self): + """ + Old reference to identity constraints, for backward compatibility. Will be removed in v1.1.0. + """ + return self.identities + def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self @@ -353,7 +381,7 @@ class XsdGlobals(XsdValidator): else: if schema in ns_schemas: return - elif not any([schema.url == obj.url and schema.__class__ == obj.__class__ for obj in ns_schemas]): + elif not any(schema.url == obj.url and schema.__class__ == obj.__class__ for obj in ns_schemas): ns_schemas.append(schema) def clear(self, remove_schemas=False, only_unbuilt=False): @@ -375,8 +403,8 @@ class XsdGlobals(XsdValidator): del global_map[k] if k in self.substitution_groups: del self.substitution_groups[k] - if k in self.constraints: - del self.constraints[k] + if k in self.identities: + del self.identities[k] if remove_schemas: namespaces = NamespaceResourcesMap() @@ -390,7 +418,7 @@ class XsdGlobals(XsdValidator): for global_map in self.global_maps: global_map.clear() self.substitution_groups.clear() - self.constraints.clear() + self.identities.clear() if remove_schemas: self.namespaces.clear() @@ -426,18 +454,18 @@ class XsdGlobals(XsdValidator): self.notations.update(meta_schema.maps.notations) self.elements.update(meta_schema.maps.elements) self.substitution_groups.update(meta_schema.maps.substitution_groups) - self.constraints.update(meta_schema.maps.constraints) + self.identities.update(meta_schema.maps.identities) not_built_schemas = [schema for schema in self.iter_schemas() if not schema.built] for schema in not_built_schemas: schema._root_elements = None # Load and build global declarations - load_xsd_notations(self.notations, not_built_schemas) load_xsd_simple_types(self.types, not_built_schemas) + load_xsd_complex_types(self.types, not_built_schemas) + load_xsd_notations(self.notations, not_built_schemas) load_xsd_attributes(self.attributes, not_built_schemas) load_xsd_attribute_groups(self.attribute_groups, not_built_schemas) - load_xsd_complex_types(self.types, not_built_schemas) load_xsd_elements(self.elements, not_built_schemas) load_xsd_groups(self.groups, not_built_schemas) @@ -448,8 +476,23 @@ class XsdGlobals(XsdValidator): self.lookup_notation(qname) for qname in self.attributes: self.lookup_attribute(qname) + for qname in self.attribute_groups: self.lookup_attribute_group(qname) + for schema in filter( + lambda x: isinstance(x.default_attributes, string_base_type), + not_built_schemas): + try: + schema.default_attributes = schema.maps.attribute_groups[schema.default_attributes] + except KeyError: + schema.default_attributes = None + msg = "defaultAttributes={!r} doesn't match an attribute group of {!r}" + schema.parse_error( + error=msg.format(schema.root.get('defaultAttributes'), schema), + elem=schema.root, + validation=schema.validation + ) + for qname in self.types: self.lookup_type(qname) for qname in self.elements: @@ -462,58 +505,92 @@ class XsdGlobals(XsdValidator): for group in schema.iter_components(XsdGroup): group.build() - for schema in filter(lambda x: x.meta_schema is not None, not_built_schemas): - # Build key references and assertions (XSD meta-schema doesn't have any of them) - for constraint in schema.iter_components(XsdKeyref): - constraint.parse_refer() - for assertion in schema.iter_components(XsdAssert): - assertion.parse() - self._check_schema(schema) + # Builds xs:keyref's key references + for constraint in filter(lambda x: isinstance(x, XsdKeyref), self.identities.values()): + constraint.parse_refer() - if self.validation == 'strict' and not self.built: - raise XMLSchemaNotBuiltError(self, "global map %r not built!" % self) + # Build XSD 1.1 identity references and assertions + if self.xsd_version != '1.0': + for schema in filter(lambda x: x.meta_schema is not None, not_built_schemas): + for e in schema.iter_components(Xsd11Element): + for constraint in filter(lambda x: x.ref is not None, e.identities.values()): + try: + ref = self.identities[constraint.name] + except KeyError: + schema.parse_error("Unknown %r constraint %r" % (type(constraint), constraint.name)) + else: + constraint.selector = ref.selector + constraint.fields = ref.fields + if not isinstance(ref, constraint.__class__): + constraint.parse_error("attribute 'ref' points to a different kind constraint") + elif isinstance(constraint, XsdKeyref): + constraint.refer = ref.refer + constraint.ref = ref + + for assertion in schema.iter_components(XsdAssert): + assertion.parse_xpath_test() + + self.check(filter(lambda x: x.meta_schema is not None, not_built_schemas), self.validation) + + def check(self, schemas=None, validation='strict'): + """ + Checks the global maps. For default checks all schemas and raises an exception at first error. + + :param schemas: optional argument with the set of the schemas to check. + :param validation: overrides the default validation mode of the validator. + :raise: XMLSchemaParseError + """ + schemas = set(schemas if schemas is not None else self.iter_schemas()) - def _check_schema(self, schema): # Checks substitution groups circularities for qname in self.substitution_groups: xsd_element = self.elements[qname] for e in xsd_element.iter_substitutes(): if e is xsd_element: - schema.parse_error("circularity found for substitution group with head element %r" % xsd_element) + msg = "circularity found for substitution group with head element %r" + e.parse_error(msg.format(e), validation=validation) + elif e.abstract and e.name not in self.substitution_groups and self.xsd_version > '1.0': + self.parse_error("in XSD 1.1 an abstract element cannot be member of a substitution group") - if schema.XSD_VERSION > '1.0' and schema.default_attributes is not None: - if not isinstance(schema.default_attributes, XsdAttributeGroup): - schema.default_attributes = None - schema.parse_error("defaultAttributes={!r} doesn't match an attribute group of {!r}" - .format(schema.root.get('defaultAttributes'), schema), schema.root) + if validation == 'strict' and not self.built: + raise XMLSchemaNotBuiltError(self, "global map has unbuilt components: %r" % self.unbuilt) - if schema.validation == 'skip': - return - - # Check redefined global groups - for group in filter(lambda x: x.schema is schema and x.redefine is not None, self.groups.values()): + # Check redefined global groups restrictions + for group in filter(lambda x: x.schema in schemas and x.redefine is not None, self.groups.values()): if not any(isinstance(e, XsdGroup) and e.name == group.name for e in group) \ and not group.is_restriction(group.redefine): - group.parse_error("The redefined group is an illegal restriction of the original group.") + msg = "The redefined group is an illegal restriction of the original group." + group.parse_error(msg, validation=validation) - # Check complex content types models - for xsd_type in schema.iter_components(XsdComplexType): - if not isinstance(xsd_type.content_type, XsdGroup): - continue + # Check complex content types models restrictions + for xsd_global in filter(lambda x: x.schema in schemas, self.iter_globals()): + for xsd_type in xsd_global.iter_components(XsdComplexType): + if not isinstance(xsd_type.content_type, XsdGroup): + continue - base_type = xsd_type.base_type - if xsd_type.derivation == 'restriction': - if base_type and base_type.name != XSD_ANY_TYPE and base_type.is_complex(): - if not xsd_type.content_type.is_restriction(base_type.content_type): - xsd_type.parse_error("The derived group is an illegal restriction of the base type group.") + if xsd_type.derivation == 'restriction': + base_type = xsd_type.base_type + if base_type and base_type.name != XSD_ANY_TYPE and base_type.is_complex(): + if not xsd_type.content_type.is_restriction(base_type.content_type): + msg = "The derived group is an illegal restriction of the base type group." + xsd_type.parse_error(msg, validation=validation) - try: - xsd_type.content_type.check_model() - except XMLSchemaModelDepthError: - msg = "cannot verify the content model of %r due to maximum recursion depth exceeded" % xsd_type - schema.warnings.append(msg) - warnings.warn(msg, XMLSchemaWarning, stacklevel=4) - except XMLSchemaModelError as err: - if self.validation == 'strict': - raise - xsd_type.errors.append(err) + if base_type.is_complex() and not base_type.open_content and \ + xsd_type.open_content and xsd_type.open_content.mode != 'none': + group = xsd_type.schema.create_any_content_group( + parent=xsd_type, + any_element=xsd_type.open_content.any_element + ) + if not group.is_restriction(base_type.content_type): + self.parse_error("restriction has an open content but base type has not") + + try: + xsd_type.content_type.check_model() + except XMLSchemaModelDepthError: + msg = "cannot verify the content model of {!r} due to maximum recursion depth exceeded" + xsd_type.schema.warnings.append(msg.format(xsd_type)) + warnings.warn(msg, XMLSchemaWarning, stacklevel=4) + except XMLSchemaModelError as err: + if validation == 'strict': + raise + xsd_type.errors.append(err) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 9137ed6..e5345b1 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -12,20 +12,20 @@ This module contains classes for XML Schema model groups. """ from __future__ import unicode_literals +import warnings from ..compat import unicode_type from ..exceptions import XMLSchemaValueError from ..etree import etree_element -from ..qnames import XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_COMPLEX_TYPE, \ - XSD_ELEMENT, XSD_ANY, XSD_RESTRICTION, XSD_EXTENSION -from xmlschema.helpers import get_qname, local_name -from ..converters import XMLSchemaConverter +from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \ + XSD_CHOICE, XSD_ELEMENT, XSD_ANY, XSI_TYPE, get_qname, local_name -from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError +from .exceptions import XMLSchemaValidationError, XMLSchemaChildrenValidationError, \ + XMLSchemaTypeTableWarning from .xsdbase import ValidationMixin, XsdComponent, XsdType from .elements import XsdElement -from .wildcards import XsdAnyElement -from .models import MAX_MODEL_DEPTH, ParticleMixin, ModelGroup, ModelVisitor +from .wildcards import XsdAnyElement, Xsd11AnyElement +from .models import ParticleMixin, ModelGroup, ModelVisitor ANY_ELEMENT = etree_element( XSD_ANY, @@ -39,54 +39,56 @@ ANY_ELEMENT = etree_element( class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): """ - A class for XSD 1.0 model group definitions. + Class for XSD 1.0 *model group* definitions. - - Content: (annotation?, (all | choice | sequence)?) - + .. + Content: (annotation?, (all | choice | sequence)?) + - - Content: (annotation?, element*) - + .. + Content: (annotation?, element*) + - - Content: (annotation?, (element | group | choice | sequence | any)*) - + .. + Content: (annotation?, (element | group | choice | sequence | any)*) + - - Content: (annotation?, (element | group | choice | sequence | any)*) - + .. + Content: (annotation?, (element | group | choice | sequence | any)*) + """ mixed = False model = None redefine = None - _admitted_tags = { - XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE - } + restriction = None + interleave = None # an Xsd11AnyElement in case of XSD 1.1 openContent with mode='interleave' + suffix = None # an Xsd11AnyElement in case of openContent with mode='suffix' or 'interleave' - def __init__(self, elem, schema, parent, name=None): + _ADMITTED_TAGS = {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE} + + def __init__(self, elem, schema, parent): self._group = [] if parent is not None and parent.mixed: self.mixed = parent.mixed - super(XsdGroup, self).__init__(elem, schema, parent, name) + super(XsdGroup, self).__init__(elem, schema, parent) def __repr__(self): if self.name is None: @@ -112,60 +114,53 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): def _parse(self): super(XsdGroup, self)._parse() self.clear() - elem = self.elem - self._parse_particle(elem) + self._parse_particle(self.elem) - if elem.tag == XSD_GROUP: - # Global group (group) - name = elem.get('name') - ref = elem.get('ref') - if name is None: - if ref is not None: - # Reference to a global group - if self.parent is None: - self.parse_error("a group reference cannot be global") + if self.elem.tag != XSD_GROUP: + # Local group (sequence|all|choice) + if 'name' in self.elem.attrib: + self.parse_error("attribute 'name' not allowed for a local group") + self._parse_content_model(self.elem) - try: - self.name = self.schema.resolve_qname(ref) - except ValueError as err: - self.parse_error(err, elem) - return + elif self._parse_reference(): + try: + xsd_group = self.schema.maps.lookup_group(self.name) + except KeyError: + self.parse_error("missing group %r" % self.prefixed_name) + xsd_group = self.schema.create_any_content_group(self, self.name) - try: - xsd_group = self.schema.maps.lookup_group(self.name) - except KeyError: - self.parse_error("missing group %r" % self.prefixed_name) - xsd_group = self.schema.create_any_content_group(self, self.name) + if isinstance(xsd_group, tuple): + # Disallowed circular definition, substitute with any content group. + self.parse_error("Circular definitions detected for group %r:" % self.name, xsd_group[0]) + self.model = 'sequence' + self.mixed = True + self.append(self.schema.BUILDERS.any_element_class(ANY_ELEMENT, self.schema, self)) + else: + self.model = xsd_group.model + if self.model == 'all': + if self.max_occurs != 1: + self.parse_error("maxOccurs must be 1 for 'all' model groups") + if self.min_occurs not in (0, 1): + self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") + if self.xsd_version == '1.0' and isinstance(self.parent, XsdGroup): + self.parse_error("in XSD 1.0 the 'all' model group cannot be nested") + self.append(xsd_group) + self.ref = xsd_group - if isinstance(xsd_group, tuple): - # Disallowed circular definition, substitute with any content group. - self.parse_error("Circular definitions detected for group %r:" % self.ref, xsd_group[0]) - self.model = 'sequence' - self.mixed = True - self.append(XsdAnyElement(ANY_ELEMENT, self.schema, self)) - else: - self.model = xsd_group.model - if self.model == 'all': - if self.max_occurs != 1: - self.parse_error("maxOccurs must be 1 for 'all' model groups") - if self.min_occurs not in (0, 1): - self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") - if self.schema.XSD_VERSION == '1.0' and isinstance(self.parent, XsdGroup): - self.parse_error("in XSD 1.0 the 'all' model group cannot be nested") - self.append(xsd_group) - else: - self.parse_error("missing both attributes 'name' and 'ref'") - return - elif ref is None: - # Global group - self.name = get_qname(self.target_namespace, name) - content_model = self._parse_component(elem) + else: + attrib = self.elem.attrib + try: + self.name = get_qname(self.target_namespace, attrib['name']) + except KeyError: + pass + else: + content_model = self._parse_child_component(self.elem, strict=True) if self.parent is not None: self.parse_error("attribute 'name' not allowed for a local group") else: - if 'minOccurs' in elem.attrib: + if 'minOccurs' in attrib: self.parse_error("attribute 'minOccurs' not allowed for a global group") - if 'maxOccurs' in elem.attrib: + if 'maxOccurs' in attrib: self.parse_error("attribute 'maxOccurs' not allowed for a global group") if 'minOccurs' in content_model.attrib: self.parse_error( @@ -175,29 +170,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): self.parse_error( "attribute 'maxOccurs' not allowed for the model of a global group", content_model ) - if content_model.tag not in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - self.parse_error('unexpected tag %r' % content_model.tag, content_model) - return - else: - self.parse_error("found both attributes 'name' and 'ref'") - return - elif elem.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: - # Local group (sequence|all|choice) - if 'name' in elem.attrib: - self.parse_error("attribute 'name' not allowed for a local group") - content_model = elem - self.name = None - elif elem.tag in {XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_RESTRICTION}: - self.name = self.model = None - return - else: - self.parse_error('unexpected tag %r' % elem.tag, elem) - return + if content_model.tag in {XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}: + self._parse_content_model(content_model) + else: + self.parse_error('unexpected tag %r' % content_model.tag, content_model) - self._parse_content_model(elem, content_model) - - def _parse_content_model(self, elem, content_model): + def _parse_content_model(self, content_model): self.model = local_name(content_model.tag) if self.model == 'all': if self.max_occurs != 1: @@ -205,12 +184,12 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if self.min_occurs not in (0, 1): self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") - for child in self._iterparse_components(content_model): + for child in filter(lambda x: x.tag != XSD_ANNOTATION, content_model): if child.tag == XSD_ELEMENT: # Builds inner elements and reference groups later, for avoids circularity. self.append((child, self.schema)) elif content_model.tag == XSD_ALL: - self.parse_error("'all' model can contains only elements.", elem) + self.parse_error("'all' model can contains only elements.") elif child.tag == XSD_ANY: self.append(XsdAnyElement(child, self.schema, self)) elif child.tag in (XSD_SEQUENCE, XSD_CHOICE): @@ -218,8 +197,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): elif child.tag == XSD_GROUP: try: ref = self.schema.resolve_qname(child.attrib['ref']) - except KeyError: - self.parse_error("missing attribute 'ref' in local group", child) + except (KeyError, ValueError, RuntimeError) as err: + if 'ref' not in child.attrib: + self.parse_error("missing attribute 'ref' in local group", child) + else: + self.parse_error(err, child) continue if ref != self.name: @@ -229,11 +211,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: self.append(xsd_group) elif self.redefine is None: - self.parse_error("Circular definition detected for group %r:" % self.ref, elem) + self.parse_error("Circular definition detected for group %r:" % self.name) else: if child.get('minOccurs', '1') != '1' or child.get('maxOccurs', '1') != '1': self.parse_error( - "Redefined group reference cannot have minOccurs/maxOccurs other than 1:", elem + "Redefined group reference cannot have minOccurs/maxOccurs other than 1:" ) self.append(self.redefine) else: @@ -281,32 +263,29 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if not isinstance(item, ParticleMixin): return False elif isinstance(item, XsdAnyElement): - if not item.built: - return False + continue elif item.parent is None: continue - elif item.parent is not self.parent and isinstance(item.parent, XsdType) and item.parent.parent is None: + elif item.parent is not self.parent and \ + isinstance(item.parent, XsdType) and item.parent.parent is None: continue elif not item.ref and not item.built: return False - return True - @property - def schema_elem(self): - return self.elem if self.name else self.parent.elem + return True if self.model else False @property def validation_attempted(self): if self.built: return 'full' - elif any([item.validation_attempted == 'partial' for item in self]): + elif any(item.validation_attempted == 'partial' for item in self): return 'partial' else: return 'none' @property - def ref(self): - return self.elem.get('ref') + def schema_elem(self): + return self.elem if self.name else self.parent.elem def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): @@ -323,15 +302,15 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): for obj in self.redefine.iter_components(xsd_classes): yield obj - def admitted_restriction(self, model): + def admits_restriction(self, model): if self.model == model: return True - elif self.model == 'all' and model == 'choice' and len(self) > 1: - return False - elif model == 'all' and self.model == 'choice' and len(self) > 1: - return False - if model == 'sequence' and self.model != 'sequence' and len(self) > 1: - return False + elif self.model == 'all': + return model == 'sequence' + elif self.model == 'choice': + return model == 'sequence' or len(self.ref or self) <= 1 + else: + return model == 'choice' or len(self.ref or self) <= 1 def is_empty(self): return not self.mixed and not self @@ -339,24 +318,21 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): def is_restriction(self, other, check_occurs=True): if not self: return True - elif self.ref is not None: - return self[0].is_restriction(other, check_occurs) elif not isinstance(other, ParticleMixin): raise XMLSchemaValueError("the argument 'base' must be a %r instance" % ParticleMixin) elif not isinstance(other, XsdGroup): return self.is_element_restriction(other) elif not other: return False - elif other.ref: - return self.is_restriction(other[0], check_occurs) elif len(other) == other.min_occurs == other.max_occurs == 1: if len(self) > 1: return self.is_restriction(other[0], check_occurs) - elif isinstance(self[0], XsdGroup) and self[0].is_pointless(parent=self): + elif self.ref is None and isinstance(self[0], XsdGroup) and self[0].is_pointless(parent=self): return self[0].is_restriction(other[0], check_occurs) # Compare model with model - if self.model != other.model and self.model != 'sequence' and len(self) > 1: + if self.model != other.model and self.model != 'sequence' and \ + (len(self) > 1 or self.ref is not None and len(self[0]) > 1): return False elif self.model == other.model or other.model == 'sequence': return self.is_sequence_restriction(other) @@ -366,7 +342,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): return self.is_choice_restriction(other) def is_element_restriction(self, other): - if self.schema.XSD_VERSION == '1.0' and isinstance(other, XsdElement) and \ + if self.xsd_version == '1.0' and isinstance(other, XsdElement) and \ not other.ref and other.name not in self.schema.substitution_groups: return False elif not self.has_occurs_restriction(other): @@ -403,8 +379,9 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): def is_sequence_restriction(self, other): if not self.has_occurs_restriction(other): return False + check_occurs = other.max_occurs != 0 - check_emptiable = other.model != 'choice' # or self.schema.XSD_VERSION == '1.0' + check_emptiable = other.model != 'choice' # Same model: declarations must simply preserve order other_iterator = iter(other.iter_model()) @@ -436,7 +413,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): return False check_occurs = other.max_occurs != 0 - restriction_items = list(self) + restriction_items = list(self) if self.ref is None else list(self[0]) for other_item in other.iter_model(): for item in restriction_items: @@ -451,17 +428,21 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): return not bool(restriction_items) def is_choice_restriction(self, other): - if self.parent is None and other.parent is not None and self.schema.XSD_VERSION == '1.0': - return False + if self.ref is None: + if self.parent is None and other.parent is not None: + return False # not allowed restriction in XSD 1.0 + restriction_items = list(self) + elif other.parent is None: + restriction_items = list(self[0]) + else: + return False # not allowed restriction in XSD 1.0 check_occurs = other.max_occurs != 0 - restriction_items = list(self) max_occurs = 0 other_max_occurs = 0 for other_item in other.iter_model(): for item in restriction_items: - if other_item is item or item.is_restriction(other_item, check_occurs): if max_occurs is not None: if item.max_occurs is None: @@ -482,11 +463,11 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if restriction_items: return False elif other_max_occurs is None: - if other.max_occurs: + if other.max_occurs != 0: return True other_max_occurs = 0 elif other.max_occurs is None: - if other_max_occurs: + if other_max_occurs != 0: return True other_max_occurs = 0 else: @@ -499,40 +480,93 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: return other_max_occurs >= max_occurs * self.max_occurs - def iter_elements(self, depth=0): - if depth <= MAX_MODEL_DEPTH: - for item in self: - if isinstance(item, XsdGroup): - for e in item.iter_elements(depth + 1): - yield e + def check_dynamic_context(self, elem, xsd_element, model_element, converter): + if model_element is not xsd_element: + if 'substitution' in model_element.block \ + or xsd_element.type.is_blocked(model_element): + raise XMLSchemaValidationError( + model_element, "substitution of %r is blocked" % model_element + ) + + alternatives = () + if isinstance(xsd_element, XsdAnyElement): + if xsd_element.process_contents == 'skip': + return + + try: + xsd_element = self.maps.lookup_element(elem.tag) + except LookupError: + try: + type_name = elem.attrib[XSI_TYPE].strip() + except KeyError: + return else: - yield item - for e in self.maps.substitution_groups.get(item.name, ()): - yield e + xsd_type = self.maps.get_instance_type(type_name, self.any_type, converter) + else: + alternatives = xsd_element.alternatives + try: + type_name = elem.attrib[XSI_TYPE].strip() + except KeyError: + xsd_type = xsd_element.type + else: + xsd_type = self.maps.get_instance_type(type_name, xsd_element.type, converter) - def sort_children(self, elements, default_namespace=None): - """ - Sort elements by group order, that maybe partial in case of 'all' or 'choice' ordering. - The not matching elements are appended at the end. - """ - def sorter(elem): - for e in elements_order: - if e.is_matching(elem.tag, default_namespace): - return elements_order[e] - return len(elements_order) + else: + if XSI_TYPE not in elem.attrib: + xsd_type = xsd_element.type + else: + alternatives = xsd_element.alternatives + try: + type_name = elem.attrib[XSI_TYPE].strip() + except KeyError: + xsd_type = xsd_element.type + else: + xsd_type = self.maps.get_instance_type(type_name, xsd_element.type, converter) - elements_order = {e: p for p, e in enumerate(self.iter_elements())} - return sorted(elements, key=sorter) + if model_element is not xsd_element and model_element.block: + for derivation in model_element.block.split(): + if xsd_type.is_derived(model_element.type, derivation): + reason = "usage of %r with type %s is blocked by head element" + raise XMLSchemaValidationError(self, reason % (xsd_element, derivation)) - def iter_decode(self, elem, validation='lax', **kwargs): + if XSI_TYPE not in elem.attrib: + return + + # If it's a restriction the context is the base_type's group + group = self.restriction if self.restriction is not None else self + + # Dynamic EDC check of matched element + for e in filter(lambda x: isinstance(x, XsdElement), group.iter_elements()): + if e.name == elem.tag: + other = e + else: + for other in e.iter_substitutes(): + if other.name == elem.tag: + break + else: + continue + + if len(other.alternatives) != len(alternatives) or \ + not xsd_type.is_dynamic_consistent(other.type): + reason = "%r that matches %r is not consistent with local declaration %r" + raise XMLSchemaValidationError(self, reason % (elem, xsd_element, other)) + elif not all(any(a == x for x in alternatives) for a in other.alternatives) or \ + not all(any(a == x for x in other.alternatives) for a in alternatives): + msg = "Maybe a not equivalent type table between elements %r and %r." % (self, xsd_element) + warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) + + def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs): """ Creates an iterator for decoding an Element content. :param elem: the Element that has to be decoded. :param validation: the validation mode, can be 'lax', 'strict' or 'skip. + :param converter: an :class:`XMLSchemaConverter` subclass or instance \ + to use for the decoding. + :param level: the depth of the element in the tree structure. :param kwargs: keyword arguments for the decoding process. - :return: yields a list of 3-tuples (key, decoded data, decoder), eventually \ - preceded by a sequence of validation or decoding errors. + :return: yields a list of 3-tuples (key, decoded data, decoder), \ + eventually preceded by a sequence of validation or decoding errors. """ def not_whitespace(s): return s is not None and s.strip() @@ -542,9 +576,9 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): if validation != 'skip' and not self.mixed: # Check element CDATA - if not_whitespace(elem.text) or any([not_whitespace(child.tail) for child in elem]): + if not_whitespace(elem.text) or any(not_whitespace(child.tail) for child in elem): if len(self) == 1 and isinstance(self[0], XsdAnyElement): - pass # [XsdAnyElement()] is equivalent to an empty complexType declaration + pass # [XsdAnyElement()] equals to an empty complexType declaration else: reason = "character data between child elements not allowed!" yield self.validation_error(validation, reason, elem, **kwargs) @@ -560,73 +594,78 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): errors = [] try: - default_namespace = kwargs['converter'].get('') - except (KeyError, AttributeError): - kwargs['converter'] = self.schema.get_converter(**kwargs) - default_namespace = kwargs['converter'].get('') + default_namespace = converter.get('') + except (AttributeError, TypeError): + converter = self.schema.get_converter(converter, level=level, **kwargs) + default_namespace = converter.get('') + model_broken = False for index, child in enumerate(elem): if callable(child.tag): continue # child is a - if not default_namespace or child.tag[0] == '{': - tag = child.tag - else: - tag = '{%s}%s' % (default_namespace, child.tag) - while model.element is not None: - if tag in model.element.names or model.element.name is None \ - and model.element.is_matching(tag, default_namespace): - xsd_element = model.element - else: - for xsd_element in model.element.iter_substitutes(): - if tag in xsd_element.names: - break + xsd_element = model.element.match( + child.tag, default_namespace, group=self, occurs=model.occurs + ) + if xsd_element is None: + if self.interleave is not None and \ + self.interleave.is_matching(child.tag, default_namespace, self, model.occurs): + xsd_element = self.interleave + break + + for particle, occurs, expected in model.advance(False): + errors.append((index, particle, occurs, expected)) + model.clear() + model_broken = True # the model is broken, continues with raw decoding. + break else: - for particle, occurs, expected in model.advance(False): - errors.append((index, particle, occurs, expected)) - model.clear() - model.broken = True # the model is broken, continues with raw decoding. - break continue + break + + try: + self.check_dynamic_context(child, xsd_element, model.element, converter) + except XMLSchemaValidationError as err: + yield self.validation_error(validation, err, elem, **kwargs) for particle, occurs, expected in model.advance(True): errors.append((index, particle, occurs, expected)) break else: - for xsd_element in self.iter_elements(): - if tag in xsd_element.names or xsd_element.name is None \ - and xsd_element.is_matching(child.tag, default_namespace): - if not model.broken: - model.broken = True - errors.append((index, xsd_element, 0, [])) - break + if self.suffix is not None and self.suffix.is_matching(child.tag, default_namespace, self): + xsd_element = self.suffix else: - errors.append((index, self, 0, None)) - xsd_element = None - if not model.broken: - model.broken = True + for xsd_element in self.iter_elements(): + if xsd_element.is_matching(child.tag, default_namespace, group=self): + if not model_broken: + errors.append((index, xsd_element, 0, [])) + model_broken = True + break + else: + errors.append((index, self, 0, None)) + xsd_element = None + model_broken = True - if xsd_element is None: + if xsd_element is None or kwargs.get('no_depth'): # TODO: use a default decoder str-->str?? continue - if '_no_deep' not in kwargs: # TODO: Complete lazy validation - for result in xsd_element.iter_decode(child, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - result_list.append((child.tag, result, xsd_element)) + for result in xsd_element.iter_decode( + child, validation, converter=converter, level=level, **kwargs): + if isinstance(result, XMLSchemaValidationError): + yield result + else: + result_list.append((child.tag, result, xsd_element)) - if cdata_index and child.tail is not None: - tail = unicode_type(child.tail.strip()) - if tail: - if result_list and isinstance(result_list[-1][0], int): - tail = result_list[-1][1] + ' ' + tail - result_list[-1] = result_list[-1][0], tail, None - else: - result_list.append((cdata_index, tail, None)) - cdata_index += 1 + if cdata_index and child.tail is not None: + tail = unicode_type(child.tail.strip()) + if tail: + if result_list and isinstance(result_list[-1][0], int): + tail = result_list[-1][1] + ' ' + tail + result_list[-1] = result_list[-1][0], tail, None + else: + result_list.append((cdata_index, tail, None)) + cdata_index += 1 if model.element is not None: index = len(elem) @@ -639,69 +678,18 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): yield result_list - def sort_content(self, content): - """ - Takes a dictionary and returns a list of element name and content tuples. - - Ordering is inferred from ModelVisitor with any elements that don't - fit the schema placed at the end of the returned list. The calling - function is responsible for raising or collecting errors from those - unplaced elements. - - :param content: a dictionary of element name to list of element contents. - The values of this dictionary must be lists where each item of the - list is the content of a single element. - :return: yields of a list of the Element being encoded's children. - """ - consumable_content = {key: iter(val) for key, val in content.items()} - - ordered_content = [] - model = ModelVisitor(self) - while model.element is not None: - elem_name = None - if model.element.name in consumable_content: - elem_name = model.element.name - else: - for elem in model.element.iter_substitutes(): - if elem.name in consumable_content: - elem_name = elem.name - break - - match = False - if elem_name is not None: - match = True - try: - ordered_content.append( - (elem_name, next(consumable_content[elem_name])) - ) - except StopIteration: - match = False - del consumable_content[elem_name] - - if not consumable_content: - break - # Consume the return of advance otherwise we get stuck in an - # infinite loop. Checking validity is the responsibility of - # `iter_encode`. - list(model.advance(match)) - - # Add the remaining content onto the end of the data. It's up to - # the `iter_encode` function to decide whether their presence is an - # error (validation="lax", etc.). - for elem_name, values in consumable_content.items(): - for value in values: - ordered_content.append((elem_name, value)) - - return ordered_content - - def iter_encode(self, element_data, validation='lax', **kwargs): + def iter_encode(self, element_data, validation='lax', converter=None, level=0, indent=4, **kwargs): """ Creates an iterator for encoding data to a list containing Element data. :param element_data: an ElementData instance with unencoded data. :param validation: the validation mode: can be 'lax', 'strict' or 'skip'. - :param kwargs: Keyword arguments for the encoding process. - :return: Yields a couple with the text of the Element and a list of 3-tuples \ + :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \ + for the encoding. + :param level: the depth of the element data in the tree structure. + :param indent: number of spaces for XML indentation (default is 4). + :param kwargs: keyword arguments for the encoding process. + :return: yields a couple with the text of the Element and a list of 3-tuples \ (key, decoded data, decoder), eventually preceded by a sequence of validation \ or encoding errors. """ @@ -709,26 +697,27 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): yield element_data.content return - converter = kwargs.get('converter') - if not isinstance(converter, XMLSchemaConverter): - converter = kwargs['converter'] = self.schema.get_converter(converter, **kwargs) - errors = [] text = None children = [] - level = kwargs.get('level', 0) - indent = kwargs.get('indent', 4) padding = '\n' + ' ' * indent * level - default_namespace = converter.get('') - losslessly = converter.losslessly + + try: + default_namespace = converter.get('') + except (AttributeError, TypeError): + converter = self.schema.get_converter(converter, level=level, **kwargs) + default_namespace = converter.get('') model = ModelVisitor(self) cdata_index = 0 - - if isinstance(element_data.content, dict): - content = self.sort_content(element_data.content) - else: + if isinstance(element_data.content, dict) or kwargs.get('unordered'): + content = model.iter_unordered_content(element_data.content) + elif not isinstance(element_data.content, list): + content = [] + elif converter.losslessly: content = element_data.content + else: + content = ModelVisitor(self).iter_collapsed_content(element_data.content) for index, (name, value) in enumerate(content): if isinstance(name, int): @@ -741,71 +730,61 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): cdata_index += 1 continue - if not default_namespace or name[0] == '{': - tag = name + if self.interleave and self.interleave.is_matching(name, default_namespace, group=self): + xsd_element = self.interleave + value = get_qname(default_namespace, name), value else: - tag = '{%s}%s' % (default_namespace, name) - - while model.element is not None: - if tag in model.element.names or model.element.name is None \ - and model.element.is_matching(tag, default_namespace): - xsd_element = model.element - else: - for xsd_element in model.element.iter_substitutes(): - if tag in xsd_element.names: - break - else: + while model.element is not None: + xsd_element = model.element.match( + name, default_namespace, group=self, occurs=model.occurs + ) + if xsd_element is None: for particle, occurs, expected in model.advance(): errors.append((index - cdata_index, particle, occurs, expected)) continue + elif isinstance(xsd_element, XsdAnyElement): + value = get_qname(default_namespace, name), value - if isinstance(xsd_element, XsdAnyElement): - value = get_qname(default_namespace, name), value - for result in xsd_element.iter_encode(value, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - children.append(result) - - for particle, occurs, expected in model.advance(True): - errors.append((index - cdata_index, particle, occurs, expected)) - break - else: - if validation == "strict" or losslessly: - errors.append((index - cdata_index, self, 0, [])) - - for xsd_element in self.iter_elements(): - if tag in xsd_element.names or xsd_element.name is None \ - and xsd_element.is_matching(name, default_namespace): - if isinstance(xsd_element, XsdAnyElement): - value = get_qname(default_namespace, name), value - for result in xsd_element.iter_encode(value, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - children.append(result) - break + for particle, occurs, expected in model.advance(True): + errors.append((index - cdata_index, particle, occurs, expected)) + break else: - if validation != 'skip': - reason = '%r does not match any declared element of the model group.' % name - yield self.validation_error(validation, reason, value, **kwargs) + if self.suffix and self.suffix.is_matching(name, default_namespace, group=self): + xsd_element = self.suffix + value = get_qname(default_namespace, name), value + else: + errors.append((index - cdata_index, self, 0, [])) + for xsd_element in self.iter_elements(): + if not xsd_element.is_matching(name, default_namespace, group=self): + continue + elif isinstance(xsd_element, XsdAnyElement): + value = get_qname(default_namespace, name), value + break + else: + if validation != 'skip': + reason = '%r does not match any declared element of the model group.' % name + yield self.validation_error(validation, reason, value, **kwargs) + continue + + for result in xsd_element.iter_encode( + value, validation, converter=converter, level=level, indent=indent, **kwargs): + if isinstance(result, XMLSchemaValidationError): + yield result + else: + children.append(result) if model.element is not None: index = len(element_data.content) - cdata_index for particle, occurs, expected in model.stop(): errors.append((index, particle, occurs, expected)) - # If the validation is not strict tries to solve model errors with a reorder of the children - if errors and validation != 'strict': - children = self.sort_children(children, default_namespace) - if children: if children[-1].tail is None: children[-1].tail = padding[:-indent] or '\n' else: children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n') - if validation != 'skip' and errors: + if validation != 'skip' and (errors or not content): attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()} if validation == 'lax' and converter.etree_element_class is not etree_element: child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children] @@ -813,76 +792,227 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): else: elem = converter.etree_element(element_data.tag, text, children, attrib) + if not content: + reason = "wrong content type {!r}".format(type(element_data.content)) + yield self.validation_error(validation, reason, elem, **kwargs) + for index, particle, occurs, expected in errors: yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs) yield text, children - def update_occurs(self, counter): - """ - Update group occurrences. - - :param counter: a Counter object that trace occurrences for elements and groups. - """ - if self.model in ('sequence', 'all'): - if all(counter[item] for item in self if not item.is_emptiable()): - counter[self] += 1 - for item in self: - counter[item] = 0 - elif self.model == 'choice': - if any(counter[item] for item in self): - counter[self] += 1 - for item in self: - counter[item] = 0 - else: - raise XMLSchemaValueError("the group %r has no model!" % self) - class Xsd11Group(XsdGroup): """ - A class for XSD 1.1 model group definitions. The XSD 1.1 model groups differ - from XSD 1.0 groups for the 'all' model, that can contains also other groups. + Class for XSD 1.1 *model group* definitions. - - Content: (annotation?, (element | any | group)*) - + .. The XSD 1.1 model groups differ from XSD 1.0 groups for the 'all' model, that can contains also other groups. + .. + Content: (annotation?, (element | any | group)*) + """ - def _parse_content_model(self, elem, content_model): + def _parse_content_model(self, content_model): self.model = local_name(content_model.tag) if self.model == 'all': - if self.max_occurs != 1: + if self.max_occurs not in (0, 1): self.parse_error("maxOccurs must be (0 | 1) for 'all' model groups") if self.min_occurs not in (0, 1): self.parse_error("minOccurs must be (0 | 1) for 'all' model groups") - for child in self._iterparse_components(content_model): + for child in filter(lambda x: x.tag != XSD_ANNOTATION, content_model): if child.tag == XSD_ELEMENT: # Builds inner elements and reference groups later, for avoids circularity. self.append((child, self.schema)) elif child.tag == XSD_ANY: - self.append(XsdAnyElement(child, self.schema, self)) + self.append(Xsd11AnyElement(child, self.schema, self)) elif child.tag in (XSD_SEQUENCE, XSD_CHOICE, XSD_ALL): - self.append(XsdGroup(child, self.schema, self)) + self.append(Xsd11Group(child, self.schema, self)) elif child.tag == XSD_GROUP: try: ref = self.schema.resolve_qname(child.attrib['ref']) - except KeyError: - self.parse_error("missing attribute 'ref' in local group", child) + except (KeyError, ValueError, RuntimeError) as err: + if 'ref' not in child.attrib: + self.parse_error("missing attribute 'ref' in local group", child) + else: + self.parse_error(err, child) continue if ref != self.name: - self.append(XsdGroup(child, self.schema, self)) + self.append(Xsd11Group(child, self.schema, self)) + if (self.model != 'all') ^ (self[-1].model != 'all'): + msg = "an xs:%s group cannot reference to an x:%s group" + self.parse_error(msg % (self.model, self[-1].model)) + self.pop() + elif self.redefine is None: - self.parse_error("Circular definition detected for group %r:" % self.ref, elem) + self.parse_error("Circular definition detected for group %r:" % self.name) else: if child.get('minOccurs', '1') != '1' or child.get('maxOccurs', '1') != '1': self.parse_error( - "Redefined group reference cannot have minOccurs/maxOccurs other than 1:", elem + "Redefined group reference cannot have minOccurs/maxOccurs other than 1:" ) self.append(self.redefine) else: continue # Error already caught by validation against the meta-schema + + def admits_restriction(self, model): + if self.model == model or self.model == 'all': + return True + elif self.model == 'choice': + return model == 'sequence' or len(self.ref or self) <= 1 + else: + return model == 'choice' or len(self.ref or self) <= 1 + + def is_restriction(self, other, check_occurs=True): + if not self: + return True + elif not isinstance(other, ParticleMixin): + raise XMLSchemaValueError("the argument 'base' must be a %r instance" % ParticleMixin) + elif not isinstance(other, XsdGroup): + return self.is_element_restriction(other) + elif not other: + return False + elif len(other) == other.min_occurs == other.max_occurs == 1: + if len(self) > 1: + return self.is_restriction(other[0], check_occurs) + elif self.ref is None and isinstance(self[0], XsdGroup) and self[0].is_pointless(parent=self): + return self[0].is_restriction(other[0], check_occurs) + + if other.model == 'sequence': + return self.is_sequence_restriction(other) + elif other.model == 'all': + return self.is_all_restriction(other) + elif other.model == 'choice': + return self.is_choice_restriction(other) + + def is_sequence_restriction(self, other): + if not self.has_occurs_restriction(other): + return False + + check_occurs = other.max_occurs != 0 + + item_iterator = iter(self.iter_model()) + item = next(item_iterator, None) + + for other_item in other.iter_model(): + if item is not None and item.is_restriction(other_item, check_occurs): + item = next(item_iterator, None) + elif not other_item.is_emptiable(): + break + else: + if item is None: + return True + + # Restriction check failed: try another check without removing pointless groups + item_iterator = iter(self) + item = next(item_iterator, None) + + for other_item in other.iter_model(): + if item is not None and item.is_restriction(other_item, check_occurs): + item = next(item_iterator, None) + elif not other_item.is_emptiable(): + return False + return item is None + + def is_all_restriction(self, other): + if not self.has_occurs_restriction(other): + return False + restriction_items = list(self.iter_model()) + + base_items = list(other.iter_model()) + wildcards = [] + for w1 in filter(lambda x: isinstance(x, XsdAnyElement), base_items): + for w2 in wildcards: + if w1.process_contents == w2.process_contents and w1.occurs == w2.occurs: + w2.union(w1) + w2.extended = True + break + else: + wildcards.append(w1.copy()) + + base_items.extend(w for w in wildcards if hasattr(w, 'extended')) + + for other_item in base_items: + min_occurs, max_occurs = 0, other_item.max_occurs + for k in range(len(restriction_items) - 1, -1, -1): + item = restriction_items[k] + + if item.is_restriction(other_item, check_occurs=False): + if max_occurs is None: + min_occurs += item.min_occurs + elif item.max_occurs is None or max_occurs < item.max_occurs or \ + min_occurs + item.min_occurs > max_occurs: + continue + else: + min_occurs += item.min_occurs + max_occurs -= item.max_occurs + + restriction_items.remove(item) + if not min_occurs or max_occurs == 0: + break + + if min_occurs < other_item.min_occurs: + break + else: + if not restriction_items: + return True + + # Restriction check failed: try another check in case of a choice group + if self.model != 'choice': + return False + return all(x.is_restriction(other) for x in self) + + def is_choice_restriction(self, other): + restriction_items = list(self.iter_model()) + if self.model == 'choice': + counter_func = max + else: + def counter_func(x, y): + return x + y + + check_occurs = other.max_occurs != 0 + max_occurs = 0 + other_max_occurs = 0 + + for other_item in other.iter_model(): + for item in restriction_items: + if other_item is item or item.is_restriction(other_item, check_occurs): + if max_occurs is not None: + if item.effective_max_occurs is None: + max_occurs = None + else: + max_occurs = counter_func(max_occurs, item.effective_max_occurs) + + if other_max_occurs is not None: + if other_item.effective_max_occurs is None: + other_max_occurs = None + else: + other_max_occurs = max(other_max_occurs, other_item.effective_max_occurs) + break + else: + continue + restriction_items.remove(item) + + if restriction_items: + return False + elif other_max_occurs is None: + if other.max_occurs != 0: + return True + other_max_occurs = 0 + elif other.max_occurs is None: + if other_max_occurs != 0: + return True + other_max_occurs = 0 + else: + other_max_occurs *= other.max_occurs + + if max_occurs is None: + return self.max_occurs == 0 + elif self.max_occurs is None: + return max_occurs == 0 + else: + return other_max_occurs >= max_occurs * self.max_occurs diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 8c1cd7a..1e51d95 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -17,8 +17,8 @@ from collections import Counter from elementpath import Selector, XPath1Parser, ElementPathError from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_UNIQUE, XSD_KEY, XSD_KEYREF, XSD_SELECTOR, XSD_FIELD -from ..helpers import get_qname, qname_to_prefixed +from ..qnames import XSD_ANNOTATION, XSD_QNAME, XSD_UNIQUE, XSD_KEY, XSD_KEYREF, \ + XSD_SELECTOR, XSD_FIELD, get_qname, qname_to_prefixed, qname_to_extended from ..etree import etree_getpath from ..regex import get_python_regex @@ -44,7 +44,8 @@ XsdIdentityXPathParser.build_tokenizer() class XsdSelector(XsdComponent): - _admitted_tags = {XSD_SELECTOR} + """Class for defining an XPath selector for an XSD identity constraint.""" + _ADMITTED_TAGS = {XSD_SELECTOR} pattern = re.compile(get_python_regex( r"(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*(\|" r"(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*)*" @@ -86,7 +87,8 @@ class XsdSelector(XsdComponent): class XsdFieldSelector(XsdSelector): - _admitted_tags = {XSD_FIELD} + """Class for defining an XPath field selector for an XSD identity constraint.""" + _ADMITTED_TAGS = {XSD_FIELD} pattern = re.compile(get_python_regex( r"(\.//)?((((child::)?((\i\c*:)?(\i\c*|\*)))|\.)/)*((((child::)?((\i\c*:)?(\i\c*|\*)))|\.)|" r"((attribute::|@)((\i\c*:)?(\i\c*|\*))))(\|(\.//)?((((child::)?((\i\c*:)?(\i\c*|\*)))|\.)/)*" @@ -95,6 +97,15 @@ class XsdFieldSelector(XsdSelector): class XsdIdentity(XsdComponent): + """ + Common class for XSD identity constraints. + + :ivar selector: the XPath selector of the identity constraint. + :ivar fields: a list containing the XPath field selectors of the identity constraint. + """ + selector = None + fields = () + def __init__(self, elem, schema, parent): super(XsdIdentity, self).__init__(elem, schema, parent) @@ -107,54 +118,75 @@ class XsdIdentity(XsdComponent): self.parse_error("missing required attribute 'name'", elem) self.name = None - child = self._parse_component(elem, required=False, strict=False) - if child is None or child.tag != XSD_SELECTOR: - self.parse_error("missing 'selector' declaration.", elem) - self.selector = None + for index, child in enumerate(elem): + if child.tag == XSD_SELECTOR: + self.selector = XsdSelector(child, self.schema, self) + break + elif child.tag != XSD_ANNOTATION: + self.parse_error("'selector' declaration expected.", elem) + break else: - self.selector = XsdSelector(child, self.schema, self) + self.parse_error("missing 'selector' declaration.", elem) + index = -1 self.fields = [] - for child in self._iterparse_components(elem, start=int(self.selector is not None)): + for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem[index + 1:]): if child.tag == XSD_FIELD: self.fields.append(XsdFieldSelector(child, self.schema, self)) else: self.parse_error("element %r not allowed here:" % child.tag, elem) + def _parse_identity_reference(self): + super(XsdIdentity, self)._parse() + self.name = get_qname(self.target_namespace, self.elem.attrib['ref']) + if 'name' in self.elem.attrib: + self.parse_error("attributes 'name' and 'ref' are mutually exclusive") + elif self._parse_child_component(self.elem) is not None: + self.parse_error("a reference cannot has child definitions") + def iter_elements(self): for xsd_element in self.selector.xpath_selector.iter_select(self.parent): yield xsd_element - def get_fields(self, context, decoders=None): + def get_fields(self, context, namespaces=None, decoders=None): """ Get fields for a schema or instance context element. - :param context: Context Element or XsdElement - :param decoders: Context schema fields decoders. - :return: A tuple with field values. An empty field is replaced by `None`. + :param context: context Element or XsdElement + :param namespaces: is an optional mapping from namespace prefix to URI. + :param decoders: context schema fields decoders. + :return: a tuple with field values. An empty field is replaced by `None`. """ fields = [] for k, field in enumerate(self.fields): result = field.xpath_selector.select(context) if not result: - if isinstance(self, XsdKey): - raise XMLSchemaValueError("%r key field must have a value!" % field) - else: + if not isinstance(self, XsdKey) or 'ref' in context.attrib and \ + self.schema.meta_schema is None and self.schema.XSD_VERSION != '1.0': fields.append(None) + else: + raise XMLSchemaValueError("%r key field must have a value!" % field) elif len(result) == 1: if decoders is None or decoders[k] is None: fields.append(result[0]) else: - fields.append(decoders[k].decode(result[0], validation="skip")) + value = decoders[k].data_value(result[0]) + if decoders[k].type.root_type.name == XSD_QNAME: + value = qname_to_extended(value, namespaces) + if isinstance(value, list): + fields.append(tuple(value)) + else: + fields.append(value) else: raise XMLSchemaValueError("%r field selects multiple values!" % field) return tuple(fields) - def iter_values(self, elem): + def iter_values(self, elem, namespaces): """ Iterate field values, excluding empty values (tuples with all `None` values). - :param elem: Instance XML element. + :param elem: instance XML element. + :param namespaces: XML document namespaces. :return: N-Tuple with value fields. """ current_path = '' @@ -165,13 +197,15 @@ class XsdIdentity(XsdComponent): # Change the XSD context only if the path is changed current_path = path xsd_element = self.parent.find(path) + if not hasattr(xsd_element, 'tag'): + yield XMLSchemaValidationError(self, e, "{!r} is not an element".format(xsd_element)) xsd_fields = self.get_fields(xsd_element) if all(fld is None for fld in xsd_fields): continue try: - fields = self.get_fields(e, decoders=xsd_fields) + fields = self.get_fields(e, namespaces, decoders=xsd_fields) except XMLSchemaValueError as err: yield XMLSchemaValidationError(self, e, reason=str(err)) else: @@ -180,24 +214,11 @@ class XsdIdentity(XsdComponent): @property def built(self): - return self.selector.built and all([f.built for f in self.fields]) + return self.selector is not None - @property - def validation_attempted(self): - if self.built: - return 'full' - elif self.selector.built or any([f.built for f in self.fields]): - return 'partial' - else: - return 'none' - - def __call__(self, *args, **kwargs): - for error in self.validator(*args, **kwargs): - yield error - - def validator(self, elem): + def __call__(self, elem, namespaces): values = Counter() - for v in self.iter_values(elem): + for v in self.iter_values(elem, namespaces): if isinstance(v, XMLSchemaValidationError): yield v else: @@ -209,11 +230,11 @@ class XsdIdentity(XsdComponent): class XsdUnique(XsdIdentity): - _admitted_tags = {XSD_UNIQUE} + _ADMITTED_TAGS = {XSD_UNIQUE} class XsdKey(XsdIdentity): - _admitted_tags = {XSD_KEY} + _ADMITTED_TAGS = {XSD_KEY} class XsdKeyref(XsdIdentity): @@ -223,7 +244,7 @@ class XsdKeyref(XsdIdentity): :ivar refer: reference to a *xs:key* declaration that must be in the same element \ or in a descendant element. """ - _admitted_tags = {XSD_KEYREF} + _ADMITTED_TAGS = {XSD_KEYREF} refer = None refer_path = '.' @@ -236,10 +257,11 @@ class XsdKeyref(XsdIdentity): super(XsdKeyref, self)._parse() try: self.refer = self.schema.resolve_qname(self.elem.attrib['refer']) - except KeyError: - self.parse_error("missing required attribute 'refer'") - except ValueError as err: - self.parse_error(err) + except (KeyError, ValueError, RuntimeError) as err: + if 'refer' not in self.elem.attrib: + self.parse_error("missing required attribute 'refer'") + else: + self.parse_error(err) def parse_refer(self): if self.refer is None: @@ -247,11 +269,12 @@ class XsdKeyref(XsdIdentity): elif isinstance(self.refer, (XsdKey, XsdUnique)): return # referenced key/unique identity constraint already set - try: - self.refer = self.parent.constraints[self.refer] - except KeyError: + refer = self.parent.identities.get(self.refer) + if refer is not None and refer.ref is None: + self.refer = refer + else: try: - self.refer = self.maps.constraints[self.refer] + self.refer = self.maps.identities[self.refer] except KeyError: self.parse_error("key/unique identity constraint %r is missing" % self.refer) return @@ -274,27 +297,31 @@ class XsdKeyref(XsdIdentity): self.refer_path = refer_path - def get_refer_values(self, elem): + @property + def built(self): + return self.selector is not None and isinstance(self.refer, XsdIdentity) + + def get_refer_values(self, elem, namespaces): values = set() for e in elem.iterfind(self.refer_path): - for v in self.refer.iter_values(e): + for v in self.refer.iter_values(e, namespaces): if not isinstance(v, XMLSchemaValidationError): values.add(v) return values - def validator(self, elem): + def __call__(self, elem, namespaces): if self.refer is None: return refer_values = None - for v in self.iter_values(elem): + for v in self.iter_values(elem, namespaces): if isinstance(v, XMLSchemaValidationError): yield v continue if refer_values is None: try: - refer_values = self.get_refer_values(elem) + refer_values = self.get_refer_values(elem, namespaces) except XMLSchemaValueError as err: yield XMLSchemaValidationError(self, elem, str(err)) continue @@ -303,3 +330,33 @@ class XsdKeyref(XsdIdentity): reason = "Key {!r} with value {!r} not found for identity constraint of element {!r}." \ .format(self.prefixed_name, v, qname_to_prefixed(elem.tag, self.namespaces)) yield XMLSchemaValidationError(validator=self, obj=elem, reason=reason) + + +class Xsd11Unique(XsdUnique): + + def _parse(self): + if self._parse_reference(): + super(XsdIdentity, self)._parse() + self.ref = True + else: + super(Xsd11Unique, self)._parse() + + +class Xsd11Key(XsdKey): + + def _parse(self): + if self._parse_reference(): + super(XsdIdentity, self)._parse() + self.ref = True + else: + super(Xsd11Key, self)._parse() + + +class Xsd11Keyref(XsdKeyref): + + def _parse(self): + if self._parse_reference(): + super(XsdIdentity, self)._parse() + self.ref = True + else: + super(Xsd11Keyref, self)._parse() diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py index 40dec63..7a904f4 100644 --- a/xmlschema/validators/models.py +++ b/xmlschema/validators/models.py @@ -12,12 +12,13 @@ This module contains classes and functions for processing XSD content models. """ from __future__ import unicode_literals -from collections import Counter +from collections import defaultdict, deque, Counter from ..compat import PY3, MutableSequence from ..exceptions import XMLSchemaValueError from .exceptions import XMLSchemaModelError, XMLSchemaModelDepthError from .xsdbase import ParticleMixin +from .wildcards import XsdAnyElement, Xsd11AnyElement MAX_MODEL_DEPTH = 15 """Limit depth for safe visiting of models""" @@ -30,6 +31,8 @@ class ModelGroup(MutableSequence, ParticleMixin): Class for XSD model group particles. This class implements only model related methods, schema element parsing and validation methods are implemented in derived classes. """ + parent = None + def __init__(self, model): assert model in XSD_GROUP_MODELS, "Not a valid value for 'model'" self._group = [] @@ -72,9 +75,9 @@ class ModelGroup(MutableSequence, ParticleMixin): def is_emptiable(self): if self.model == 'choice': - return self.min_occurs == 0 or not self or any([item.is_emptiable() for item in self]) + return self.min_occurs == 0 or not self or any(item.is_emptiable() for item in self) else: - return self.min_occurs == 0 or not self or all([item.is_emptiable() for item in self]) + return self.min_occurs == 0 or not self or all(item.is_emptiable() for item in self) def is_empty(self): return not self._group or self.max_occurs == 0 @@ -108,6 +111,25 @@ class ModelGroup(MutableSequence, ParticleMixin): else: return True + @property + def effective_min_occurs(self): + if self.model == 'choice': + return min(e.min_occurs for e in self.iter_model()) + return self.min_occurs * min(e.min_occurs for e in self.iter_model()) + + @property + def effective_max_occurs(self): + if self.max_occurs == 0: + return 0 + elif self.max_occurs is None: + return None if any(e.max_occurs != 0 for e in self.iter_model()) else 0 + elif any(e.max_occurs is None for e in self.iter_model()): + return None + elif self.model == 'choice': + return self.max_occurs * max(e.max_occurs for e in self.iter_model()) + else: + return self.max_occurs * sum(e.max_occurs for e in self.iter_model()) + def has_occurs_restriction(self, other): if not self: return True @@ -174,19 +196,11 @@ class ModelGroup(MutableSequence, ParticleMixin): else: yield item - def iter_subelements(self, depth=0): - if depth <= MAX_MODEL_DEPTH: - for item in self: - if isinstance(item, ModelGroup): - for e in item.iter_subelements(depth + 1): - yield e - else: - yield item - def check_model(self): """ - Checks if the model group is deterministic. Types matching of same elements and Unique Particle - Attribution Constraint are checked. Raises an `XMLSchemaModelError` at first violated constraint. + Checks if the model group is deterministic. Element Declarations Consistent and + Unique Particle Attribution constraints are checked. + :raises: an `XMLSchemaModelError` at first violated constraint. """ def safe_iter_path(group, depth): if depth > MAX_MODEL_DEPTH: @@ -202,25 +216,45 @@ class ModelGroup(MutableSequence, ParticleMixin): paths = {} current_path = [self] + try: + any_element = self.parent.open_content.any_element + except AttributeError: + any_element = None + for e in safe_iter_path(self, 0): for pe, previous_path in paths.values(): - if pe.name == e.name and pe.name is not None and pe.type is not e.type: - raise XMLSchemaModelError( - self, "The model has elements with the same name %r but a different type" % e.name - ) - elif not pe.overlap(e): + # EDC check + if not e.is_consistent(pe) or any_element and not any_element.is_consistent(pe): + msg = "Element Declarations Consistent violation between %r and %r: " \ + "match the same name but with different types" % (e, pe) + raise XMLSchemaModelError(self, msg) + + # UPA check + if pe is e or not pe.is_overlap(e): continue - elif pe is not e and pe.parent is e.parent: + elif pe.parent is e.parent: if pe.parent.model in {'all', 'choice'}: - msg = "{!r} and {!r} overlap and are in the same {!r} group" - raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model)) + if isinstance(pe, Xsd11AnyElement) and not isinstance(e, XsdAnyElement): + pe.add_precedence(e, self) + elif isinstance(e, Xsd11AnyElement) and not isinstance(pe, XsdAnyElement): + e.add_precedence(pe, self) + else: + msg = "{!r} and {!r} overlap and are in the same {!r} group" + raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model)) elif pe.min_occurs == pe.max_occurs: continue - if not distinguishable_paths(previous_path + [pe], current_path + [e]): + if distinguishable_paths(previous_path + [pe], current_path + [e]): + continue + elif isinstance(pe, Xsd11AnyElement) and not isinstance(e, XsdAnyElement): + pe.add_precedence(e, self) + elif isinstance(e, Xsd11AnyElement) and not isinstance(pe, XsdAnyElement): + e.add_precedence(pe, self) + else: raise XMLSchemaModelError( self, "Unique Particle Attribution violation between {!r} and {!r}".format(pe, e) ) + paths[e.name] = e, current_path[:] @@ -255,17 +289,29 @@ def distinguishable_paths(path1, path2): for k in range(depth + 1, len(path1) - 1): univocal1 &= path1[k].is_univocal() + idx = path1[k].index(path1[k + 1]) if path1[k].model == 'sequence': - idx = path1[k].index(path1[k + 1]) before1 |= any(not e.is_emptiable() for e in path1[k][:idx]) after1 |= any(not e.is_emptiable() for e in path1[k][idx + 1:]) + elif path1[k].model in ('all', 'choice'): + if any(e.is_emptiable() for e in path1[k] if e is not path1[k][idx]): + univocal1 = before1 = after1 = False + else: + if len(path2[k]) > 1 and all(e.is_emptiable() for e in path1[k] if e is not path1[k][idx]): + univocal1 = before1 = after1 = False for k in range(depth + 1, len(path2) - 1): univocal2 &= path2[k].is_univocal() + idx = path2[k].index(path2[k + 1]) if path2[k].model == 'sequence': - idx = path2[k].index(path2[k + 1]) before2 |= any(not e.is_emptiable() for e in path2[k][:idx]) after2 |= any(not e.is_emptiable() for e in path2[k][idx + 1:]) + elif path2[k].model in ('all', 'choice'): + if any(e.is_emptiable() for e in path2[k] if e is not path2[k][idx]): + univocal2 = before2 = after2 = False + else: + if len(path2[k]) > 1 and all(e.is_emptiable() for e in path2[k] if e is not path2[k][idx]): + univocal2 = before2 = after2 = False if path1[depth].model != 'sequence': return before1 and before2 or \ @@ -288,10 +334,8 @@ class ModelVisitor(MutableSequence): :param root: the root ModelGroup instance of the model. :ivar occurs: the Counter instance for keeping track of occurrences of XSD elements and groups. :ivar element: the current XSD element, initialized to the first element of the model. - :ivar broken: a boolean value that records if the model is still usable. :ivar group: the current XSD model group, initialized to *root* argument. - :ivar iterator: the current XSD group iterator. - :ivar items: the current XSD group unmatched items. + :ivar items: the current XSD group's items iterator. :ivar match: if the XSD group has an effective item match. """ def __init__(self, root): @@ -299,8 +343,7 @@ class ModelVisitor(MutableSequence): self.occurs = Counter() self._subgroups = [] self.element = None - self.broken = False - self.group, self.iterator, self.items, self.match = root, iter(root), root[::-1], False + self.group, self.items, self.match = root, iter(root), False self._start() def __str__(self): @@ -336,18 +379,17 @@ class ModelVisitor(MutableSequence): del self._subgroups[:] self.occurs.clear() self.element = None - self.broken = False - self.group, self.iterator, self.items, self.match = self.root, iter(self.root), self.root[::-1], False + self.group, self.items, self.match = self.root, iter(self.root), False def _start(self): while True: - item = next(self.iterator, None) + item = next(self.items, None) if item is None or not isinstance(item, ModelGroup): self.element = item break elif item: - self.append((self.group, self.iterator, self.items, self.match)) - self.group, self.iterator, self.items, self.match = item, iter(item), item[::-1], False + self.append((self.group, self.items, self.match)) + self.group, self.items, self.match = item, iter(item), False @property def expected(self): @@ -355,12 +397,19 @@ class ModelVisitor(MutableSequence): Returns the expected elements of the current and descendant groups. """ expected = [] - for item in reversed(self.items): - if isinstance(item, ModelGroup): - expected.extend(item.iter_elements()) + if self.group.model == 'choice': + items = self.group + elif self.group.model == 'all': + items = (e for e in self.group if e.min_occurs > self.occurs[e]) + else: + items = (e for e in self.group if e.min_occurs > self.occurs[e]) + + for e in items: + if isinstance(e, ModelGroup): + expected.extend(e.iter_elements()) else: - expected.append(item) - expected.extend(item.maps.substitution_groups.get(item.name, ())) + expected.append(e) + expected.extend(e.maps.substitution_groups.get(e.name, ())) return expected def restart(self): @@ -387,7 +436,7 @@ class ModelVisitor(MutableSequence): or for the current group, `False` otherwise. """ if isinstance(item, ModelGroup): - self.group, self.iterator, self.items, self.match = self.pop() + self.group, self.items, self.match = self.pop() item_occurs = occurs[item] model = self.group.model @@ -396,29 +445,21 @@ class ModelVisitor(MutableSequence): if model == 'choice': occurs[item] = 0 occurs[self.group] += 1 - self.iterator, self.match = iter(self.group), False - else: - if model == 'all': - self.items.remove(item) - else: - self.items.pop() - if not self.items: - self.occurs[self.group] += 1 + self.items, self.match = iter(self.group), False + elif model == 'sequence' and item is self.group[-1]: + self.occurs[self.group] += 1 return item.is_missing(item_occurs) elif model == 'sequence': if self.match: - self.items.pop() - if not self.items: + if item is self.group[-1]: occurs[self.group] += 1 return not item.is_emptiable() elif item.is_emptiable(): - self.items.pop() return False elif self.group.min_occurs <= occurs[self.group] or self: return stop_item(self.group) else: - self.items.pop() return True element, occurs = self.element, self.occurs @@ -430,6 +471,8 @@ class ModelVisitor(MutableSequence): self.match = True if not element.is_over(occurs[element]): return + + obj = None try: if stop_item(element): yield element, occurs[element], [element] @@ -438,32 +481,250 @@ class ModelVisitor(MutableSequence): while self.group.is_over(occurs[self.group]): stop_item(self.group) - obj = next(self.iterator, None) + obj = next(self.items, None) if obj is None: if not self.match: - if self.group.model == 'all' and all(e.min_occurs == 0 for e in self.items): - occurs[self.group] += 1 - group, expected = self.group, self.items + if self.group.model == 'all': + for e in self.group: + occurs[e] = occurs[(e,)] + if all(e.min_occurs <= occurs[e] for e in self.group): + occurs[self.group] = 1 + group, expected = self.group, self.expected if stop_item(group) and expected: - yield group, occurs[group], self.expected - elif not self.items: - self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False - elif self.group.model == 'all': - self.iterator, self.match = iter(self.items), False - elif all(e.min_occurs == 0 for e in self.items): - self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False - occurs[self.group] += 1 + yield group, occurs[group], expected + elif self.group.model != 'all': + self.items, self.match = iter(self.group), False + elif any(not e.is_over(occurs[e]) for e in self.group): + for e in self.group: + occurs[(e,)] += occurs[e] + self.items, self.match = (e for e in self.group if not e.is_over(occurs[e])), False + else: + for e in self.group: + occurs[(e,)] += occurs[e] + occurs[self.group] = 1 elif not isinstance(obj, ModelGroup): # XsdElement or XsdAnyElement self.element, occurs[obj] = obj, 0 return - elif obj: - self.append((self.group, self.iterator, self.items, self.match)) - self.group, self.iterator, self.items, self.match = obj, iter(obj), obj[::-1], False + else: + self.append((self.group, self.items, self.match)) + self.group, self.items, self.match = obj, iter(obj), False occurs[obj] = 0 + if obj.model == 'all': + for e in obj: + occurs[(e,)] = 0 except IndexError: + # Model visit ended self.element = None - if self.group.is_missing(occurs[self.group]) and self.items: - yield self.group, occurs[self.group], self.expected + if self.group.is_missing(occurs[self.group]): + if self.group.model == 'choice': + yield self.group, occurs[self.group], self.expected + elif self.group.model == 'sequence': + if obj is not None: + yield self.group, occurs[self.group], self.expected + elif any(e.min_occurs > occurs[e] for e in self.group): + yield self.group, occurs[self.group], self.expected + + def sort_content(self, content, restart=True): + if restart: + self.restart() + return [(name, value) for name, value in self.iter_unordered_content(content)] + + def iter_unordered_content(self, content): + """ + Takes an unordered content stored in a dictionary of lists and yields the + content elements sorted with the ordering defined by the model. Character + data parts are yielded at start and between child elements. + + Ordering is inferred from ModelVisitor instance with any elements that + don't fit the schema placed at the end of the returned sequence. Checking + the yielded content validity is the responsibility of method *iter_encode* + of class :class:`XsdGroup`. + + :param content: a dictionary of element names to list of element contents \ + or an iterable composed of couples of name and value. In case of a \ + dictionary the values ​​must be lists where each item is the content \ + of a single element. + :return: yields of a sequence of the Element being encoded's children. + """ + if isinstance(content, dict): + cdata_content = sorted(((k, v) for k, v in content.items() if isinstance(k, int)), reverse=True) + consumable_content = {k: iter(v) for k, v in content.items() if not isinstance(k, int)} + else: + cdata_content = sorted(((k, v) for k, v in content if isinstance(k, int)), reverse=True) + consumable_content = defaultdict(list) + for k, v in filter(lambda x: not isinstance(x[0], int), content): + consumable_content[k].append(v) + consumable_content = {k: iter(v) for k, v in consumable_content.items()} + + if cdata_content: + yield cdata_content.pop() + + while self.element is not None and consumable_content: + for name in consumable_content: + if self.element.is_matching(name): + try: + yield name, next(consumable_content[name]) + except StopIteration: + del consumable_content[name] + for _ in self.advance(False): + pass + else: + if cdata_content: + yield cdata_content.pop() + break + else: + # Consume the return of advance otherwise we get stuck in an infinite loop. + for _ in self.advance(False): + pass + + # Add the remaining consumable content onto the end of the data. + for name, values in consumable_content.items(): + for v in values: + yield name, v + if cdata_content: + yield cdata_content.pop() + + while cdata_content: + yield cdata_content.pop() + + def iter_collapsed_content(self, content): + """ + Iterates a content stored in a sequence of couples *(name, value)*, yielding + items in the same order of the sequence, except for repetitions of the same + tag that don't match with the current element of the :class:`ModelVisitor` + instance. These items are included in an unsorted buffer and yielded asap + when there is a match with the model's element or at the end of the iteration. + + This iteration mode, in cooperation with the method *iter_encode* of the class + XsdGroup, facilitates the encoding of content formatted with a convention that + collapses the children with the same tag into a list (eg. BadgerFish). + + :param content: an iterable containing couples of names and values. + :return: yields of a sequence of the Element being encoded's children. + """ + prev_name = None + unordered_content = defaultdict(deque) + + for name, value in content: + if isinstance(name, int) or self.element is None: + yield name, value + continue + + while self.element is not None: + if self.element.is_matching(name): + yield name, value + prev_name = name + for _ in self.advance(True): + pass + break + + for key in unordered_content: + if self.element.is_matching(key): + break + else: + if prev_name == name: + unordered_content[name].append(value) + break + + for _ in self.advance(False): + pass + continue + + try: + yield key, unordered_content[key].popleft() + except IndexError: + del unordered_content[key] + else: + for _ in self.advance(True): + pass + else: + yield name, value + prev_name = name + + # Add the remaining consumable content onto the end of the data. + for name, values in unordered_content.items(): + for v in values: + yield name, v + + +class Occurrence(object): + """ + Class for XSD particles occurrence counting and comparison. + """ + def __init__(self, occurs): + self.occurs = occurs + + def add(self, occurs): + if self.occurs is None: + pass + elif occurs is None: + self.occurs = None + else: + self.occurs += occurs + + def sub(self, occurs): + if self.occurs is None: + pass + elif occurs is None: + self.occurs = 0 + else: + self.occurs -= occurs + + def mul(self, occurs): + if occurs == 0: + self.occurs = 0 + elif not self.occurs: + pass + elif occurs is None: + self.occurs = None + else: + self.occurs *= occurs + + def max(self, occurs): + if self.occurs is None: + pass + elif occurs is None: + self.occurs = occurs + else: + self.occurs = max(self.occurs, occurs) + + def __eq__(self, occurs): + return self.occurs == occurs + + def __ne__(self, occurs): + return self.occurs != occurs + + def __ge__(self, occurs): + if self.occurs is None: + return True + elif occurs is None: + return False + else: + return self.occurs >= occurs + + def __gt__(self, occurs): + if self.occurs is None: + return True + elif occurs is None: + return False + else: + return self.occurs > occurs + + def __le__(self, occurs): + if occurs is None: + return True + elif self.occurs is None: + return False + else: + return self.occurs <= occurs + + def __lt__(self, occurs): + if occurs is None: + return True + elif self.occurs is None: + return False + else: + return self.occurs < occurs diff --git a/xmlschema/validators/notations.py b/xmlschema/validators/notations.py index 2f21c04..05efe52 100644 --- a/xmlschema/validators/notations.py +++ b/xmlschema/validators/notations.py @@ -10,32 +10,24 @@ # from __future__ import unicode_literals -from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_NOTATION -from ..helpers import get_qname - +from ..qnames import XSD_NOTATION, get_qname from .xsdbase import XsdComponent class XsdNotation(XsdComponent): """ - Class for XSD 'notation' declarations. + Class for XSD *notation* declarations. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_NOTATION} - - def __init__(self, elem, schema, parent): - if parent is not None: - raise XMLSchemaValueError("'parent' attribute is not None but %r must be global!" % self) - super(XsdNotation, self).__init__(elem, schema, parent) + _ADMITTED_TAGS = {XSD_NOTATION} @property def built(self): @@ -43,15 +35,15 @@ class XsdNotation(XsdComponent): def _parse(self): super(XsdNotation, self)._parse() - if not self.is_global: - self.parse_error("a notation declaration must be global.", self.elem) + if self.parent is not None: + self.parse_error("a notation declaration must be global", self.elem) try: self.name = get_qname(self.target_namespace, self.elem.attrib['name']) except KeyError: - self.parse_error("a notation must have a 'name'.", self.elem) + self.parse_error("a notation must have a 'name' attribute", self.elem) if 'public' not in self.elem.attrib and 'system' not in self.elem.attrib: - self.parse_error("a notation must has a 'public' or a 'system' attribute.", self.elem) + self.parse_error("a notation must has a 'public' or a 'system' attribute", self.elem) @property def public(self): diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index e6f8768..321809f 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -14,51 +14,51 @@ This module contains XMLSchema classes creator for xmlschema package. Two schema classes are created at the end of this module, XMLSchema10 for XSD 1.0 and XMLSchema11 for XSD 1.1. The latter class parses also XSD 1.0 schemas, as prescribed by the standard. - -Those are the differences between XSD 1.0 and XSD 1.1 and their current development status: - - * All model extended for content groups - * Assertions for simple types - * Default attributes for complex types - * Alternative type for elements - * Inheritable attributes - * targetNamespace for restricted element and attributes - * Assert for complex types - * TODO: OpenContent and XSD 1.1 wildcards for complex types - * schema overrides """ import os from collections import namedtuple, Counter from abc import ABCMeta +import logging import warnings +import re from ..compat import add_metaclass -from ..exceptions import XMLSchemaTypeError, XMLSchemaURLError, XMLSchemaValueError, XMLSchemaOSError -from ..qnames import XSD_SCHEMA, XSD_ANNOTATION, XSD_NOTATION, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, \ - XSD_GROUP, XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_ANY, \ - XSD_ANY_ATTRIBUTE, XSD_REDEFINE, XSD_OVERRIDE -from ..helpers import has_xsd_components, get_xsd_derivation_attribute, get_xsd_form_attribute +from ..exceptions import XMLSchemaTypeError, XMLSchemaURLError, XMLSchemaKeyError, \ + XMLSchemaValueError, XMLSchemaOSError, XMLSchemaNamespaceError +from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \ + VC_TYPE_UNAVAILABLE, VC_FACET_AVAILABLE, VC_FACET_UNAVAILABLE, XSD_SCHEMA, \ + XSD_ANNOTATION, XSD_NOTATION, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_GROUP, \ + XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_CHOICE, \ + XSD_ALL, XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, \ + XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT +from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, XHTML_NAMESPACE, \ - XLINK_NAMESPACE, NamespaceResourcesMap, NamespaceView -from ..etree import etree_element, etree_tostring, ParseError + XLINK_NAMESPACE, VC_NAMESPACE, NamespaceResourcesMap, NamespaceView +from ..etree import etree_element, etree_tostring, prune_etree, ParseError from ..resources import is_remote_url, url_path_is_file, fetch_resource, XMLResource from ..converters import XMLSchemaConverter -from ..xpath import ElementPathMixin +from ..xpath import XMLSchemaProxy, ElementPathMixin from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchemaEncodeError, \ XMLSchemaNotBuiltError, XMLSchemaIncludeWarning, XMLSchemaImportWarning from .xsdbase import XSD_VALIDATION_MODES, XsdValidator, ValidationMixin, XsdComponent from .notations import XsdNotation +from .identities import XsdKey, XsdKeyref, XsdUnique, Xsd11Key, Xsd11Unique, Xsd11Keyref +from .facets import XSD_11_FACETS from .simple_types import xsd_simple_type_factory, XsdUnion, XsdAtomicRestriction, \ Xsd11AtomicRestriction, Xsd11Union from .attributes import XsdAttribute, XsdAttributeGroup, Xsd11Attribute from .complex_types import XsdComplexType, Xsd11ComplexType from .groups import XsdGroup, Xsd11Group from .elements import XsdElement, Xsd11Element -from .wildcards import XsdAnyElement, XsdAnyAttribute, Xsd11AnyElement, Xsd11AnyAttribute -from .globals_ import iterchildren_xsd_import, iterchildren_xsd_include, \ - iterchildren_xsd_redefine, iterchildren_xsd_override, XsdGlobals +from .wildcards import XsdAnyElement, XsdAnyAttribute, Xsd11AnyElement, \ + Xsd11AnyAttribute, XsdDefaultOpenContent +from .globals_ import XsdGlobals +logger = logging.getLogger('xmlschema') +logging.basicConfig(format='[%(levelname)s] %(message)s') + +XSD_VERSION_PATTERN = re.compile(r'^\d+\.\d+$') # Elements for building dummy groups ATTRIBUTE_GROUP_ELEMENT = etree_element(XSD_ATTRIBUTE_GROUP) @@ -75,11 +75,13 @@ ANY_ELEMENT = etree_element( 'maxOccurs': 'unbounded' }) +# XSD schemas of W3C standards SCHEMAS_DIR = os.path.join(os.path.dirname(__file__), 'schemas/') XML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd') -HFP_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-hasFacetAndProperty_minimal.xsd') XSI_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-instance_minimal.xsd') XLINK_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xlink.xsd') +XHTML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd') +VC_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-versioning_minimal.xsd') class XMLSchemaMeta(ABCMeta): @@ -127,7 +129,6 @@ class XMLSchemaMeta(ABCMeta): # Build the new meta-schema instance schema_location = meta_schema.url if isinstance(meta_schema, XMLSchemaBase) else meta_schema meta_schema = meta_schema_class.create_meta_schema(schema_location) - meta_schema.maps.build() dict_['meta_schema'] = meta_schema return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_) @@ -157,8 +158,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param converter: is an optional argument that can be an :class:`XMLSchemaConverter` \ subclass or instance, used for defining the default XML data converter for XML Schema instance. :type converter: XMLSchemaConverter or None - :param locations: schema location hints for namespace imports. Can be a dictionary or \ - a sequence of couples (namespace URI, resource URL). + :param locations: schema location hints, that can include additional namespaces to \ + import after processing schema's import statements. Usually filled with the couples \ + (namespace, url) extracted from xsi:schemaLocations. Can be a dictionary or a sequence \ + of couples (namespace URI, resource URL). :type locations: dict or list or None :param base_url: is an optional base URL, used for the normalization of relative paths \ when the URL of the schema resource can't be obtained from the source argument. @@ -174,6 +177,11 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): meta-schema is added at the end. In the latter case the meta-schema is rebuilt if any base \ namespace has been overridden by an import. Ignored if the argument *global_maps* is provided. :type use_meta: bool + :param loglevel: for setting a different logging level for schema initialization \ + and building. For default is WARNING (30). For INFO level set it with 20, for \ + DEBUG level with 10. The default loglevel is restored after schema building, \ + when exiting the initialization method. + :type loglevel: int :cvar XSD_VERSION: store the XSD version (1.0 or 1.1). :vartype XSD_VERSION: str @@ -185,6 +193,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype BUILDERS_MAP: dict :cvar BASE_SCHEMAS: a dictionary from namespace to schema resource for meta-schema bases. :vartype BASE_SCHEMAS: dict + :cvar FALLBACK_LOCATIONS: fallback schema location hints for other standard namespaces. + :vartype FALLBACK_LOCATIONS: dict :cvar meta_schema: the XSD meta-schema instance. :vartype meta_schema: XMLSchema :cvar attribute_form_default: the schema's *attributeFormDefault* attribute, defaults to 'unqualified'. @@ -210,7 +220,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype maps: XsdGlobals :ivar converter: the default converter used for XML data decoding/encoding. :vartype converter: XMLSchemaConverter - :ivar locations: schema location hints. + :ivar locations: schemas location hints. :vartype locations: NamespaceResourcesMap :ivar namespaces: a dictionary that maps from the prefixes used by the schema into namespace URI. :vartype namespaces: dict @@ -240,6 +250,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): BUILDERS = None BUILDERS_MAP = None BASE_SCHEMAS = None + FALLBACK_LOCATIONS = None meta_schema = None # Schema defaults @@ -248,12 +259,25 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): element_form_default = 'unqualified' block_default = '' final_default = '' - default_attributes = None # for XSD 1.1 + redefine = None - def __init__(self, source, namespace=None, validation='strict', global_maps=None, converter=None, - locations=None, base_url=None, defuse='remote', timeout=300, build=True, use_meta=True): + # Additional defaults for XSD 1.1 + default_attributes = None + default_open_content = None + override = None + + def __init__(self, source, namespace=None, validation='strict', global_maps=None, + converter=None, locations=None, base_url=None, defuse='remote', + timeout=300, build=True, use_meta=True, loglevel=None): super(XMLSchemaBase, self).__init__(validation) + if loglevel is not None: + logger.setLevel(loglevel) + elif build and global_maps is None: + logger.setLevel(logging.WARNING) + self.source = XMLResource(source, base_url, defuse, timeout, lazy=False) + logger.debug("Read schema from %r", self.source) + self.imports = {} self.includes = {} self.warnings = [] @@ -283,6 +307,9 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if '' not in self.namespaces: self.namespaces[''] = namespace + logger.debug("Schema targetNamespace is %r", self.target_namespace) + logger.debug("Declared namespaces: %r", self.namespaces) + # Parses the schema defaults if 'attributeFormDefault' in root.attrib: try: @@ -297,12 +324,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.parse_error(err, root) if 'blockDefault' in root.attrib: - try: - self.block_default = get_xsd_derivation_attribute( - root, 'blockDefault', {'extension', 'restriction', 'substitution'} - ) - except ValueError as err: - self.parse_error(err, root) + if self.meta_schema is None: + pass # Skip XSD 1.0 meta-schema that has blockDefault="#all" + else: + try: + self.block_default = get_xsd_derivation_attribute( + root, 'blockDefault', {'extension', 'restriction', 'substitution'} + ) + except ValueError as err: + self.parse_error(err, root) if 'finalDefault' in root.attrib: try: @@ -310,31 +340,22 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): except ValueError as err: self.parse_error(err, root) - if self.XSD_VERSION > '1.0': - # XSD 1.1: "defaultAttributes" and "xpathDefaultNamespace" - self.xpath_default_namespace = self._parse_xpath_default_namespace(root) - if 'defaultAttributes' in root.attrib: - try: - self.default_attributes = self.resolve_qname(root.attrib['defaultAttributes']) - except XMLSchemaValueError as error: - self.parse_error(str(error), root) - - # Set locations hints map and converter self.locations = NamespaceResourcesMap(self.source.get_locations(locations)) - if self.meta_schema is not None: - # Add fallback schema location hint for XHTML - self.locations[XHTML_NAMESPACE] = os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd') self.converter = self.get_converter(converter) + self.xpath_tokens = {} # Create or set the XSD global maps instance if self.meta_schema is None: self.maps = global_maps or XsdGlobals(self) - return # Meta-schemas don't need to be checked or built and don't process include/imports + for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.root): + self.include_schema(child.attrib['schemaLocation'], self.base_url) + return # Meta-schemas don't need to be checked or built and don't process imports elif global_maps is None: if use_meta is False: self.maps = XsdGlobals(self, validation) - self.locations.update(self.BASE_SCHEMAS) elif self.target_namespace not in self.BASE_SCHEMAS: + if not self.meta_schema.maps.types: + self.meta_schema.maps.build() self.maps = self.meta_schema.maps.copy(self, validation=validation) else: base_schemas = {k: v for k, v in self.BASE_SCHEMAS.items() if k != self.target_namespace} @@ -347,21 +368,65 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): else: raise XMLSchemaTypeError("'global_maps' argument must be a %r instance." % XsdGlobals) - # Validate the schema document - if validation == 'strict': - self.check_schema(root, self.namespaces) - elif validation == 'lax': - self.errors.extend([e for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces)]) + if self.XSD_VERSION > '1.0' and any(ns == VC_NAMESPACE for ns in self.namespaces.values()): + # For XSD 1.1+ apply versioning filter to schema tree. See the paragraph + # 4.2.2 of XSD 1.1 (Part 1: Structures) definition for details. + # Ref: https://www.w3.org/TR/xmlschema11-1/#cip + if prune_etree(root, selector=lambda x: not self.version_check(x)): + for k in list(root.attrib): + if k not in {'targetNamespace', VC_MIN_VERSION, VC_MAX_VERSION}: + del root.attrib[k] - # Includes and imports schemas (errors are treated as warnings) - self._include_schemas() - self._import_namespaces() + # Validate the schema document (transforming validation errors to parse errors) + if validation == 'strict': + try: + self.check_schema(root, self.namespaces) + except XMLSchemaValidationError as e: + self.parse_error(e.reason, elem=e.elem) + elif validation == 'lax': + for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces): + self.parse_error(e.reason, elem=e.elem) + + self._parse_inclusions() + self._parse_imports() + + # Imports by argument (usually from xsi:schemaLocation attribute). + for ns in self.locations: + if ns not in self.maps.namespaces: + self._import_namespace(ns, self.locations[ns]) if '' not in self.namespaces: self.namespaces[''] = '' # For default local names are mapped to no namespace - if build: - self.maps.build() + # XSD 1.1 default declarations (defaultAttributes, defaultOpenContent, xpathDefaultNamespace) + if self.XSD_VERSION > '1.0': + self.xpath_default_namespace = self._parse_xpath_default_namespace(root) + if 'defaultAttributes' in root.attrib: + try: + self.default_attributes = self.resolve_qname(root.attrib['defaultAttributes']) + except (ValueError, KeyError, RuntimeError) as err: + self.parse_error(str(err), root) + + for child in filter(lambda x: x.tag == XSD_DEFAULT_OPEN_CONTENT, root): + self.default_open_content = XsdDefaultOpenContent(child, self) + break + + try: + if build: + self.maps.build() + finally: + if loglevel is not None: + logger.setLevel(logging.WARNING) # Restore default logging + + def __getstate__(self): + state = self.__dict__.copy() + del state['xpath_tokens'] + state.pop('_xpath_parser', None) + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.xpath_tokens = {} def __repr__(self): if self.url: @@ -384,7 +449,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.groups = NamespaceView(value.groups, self.target_namespace) self.elements = NamespaceView(value.elements, self.target_namespace) self.substitution_groups = NamespaceView(value.substitution_groups, self.target_namespace) - self.constraints = NamespaceView(value.constraints, self.target_namespace) + self.identities = NamespaceView(value.identities, self.target_namespace) self.global_maps = (self.notations, self.types, self.attributes, self.attribute_groups, self.groups, self.elements) value.register(self) @@ -404,6 +469,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): def __len__(self): return len(self.elements) + @property + def xpath_proxy(self): + return XMLSchemaProxy(self) + + @property + def xsd_version(self): + """Property that returns the class attribute XSD_VERSION.""" + return self.XSD_VERSION + # XML resource attributes access @property def root(self): @@ -493,13 +567,17 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): @classmethod def builtin_types(cls): - """An accessor for XSD built-in types.""" + """Accessor for XSD built-in types.""" try: - return cls.meta_schema.maps.namespaces[XSD_NAMESPACE][0].types + builtin_types = cls.meta_schema.maps.namespaces[XSD_NAMESPACE][0].types except KeyError: raise XMLSchemaNotBuiltError(cls.meta_schema, "missing XSD namespace in meta-schema") except AttributeError: raise XMLSchemaNotBuiltError(cls.meta_schema, "meta-schema unavailable for %r" % cls) + else: + if not builtin_types: + cls.meta_schema.build() + return builtin_types @property def root_elements(self): @@ -518,7 +596,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for e in xsd_element.iter(): if e is xsd_element or isinstance(e, XsdAnyElement): continue - elif e.ref or e.is_global: + elif e.ref or e.parent is None: if e.name in names: names.discard(e.name) if not names: @@ -527,6 +605,13 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): return [e for e in self.elements.values() if e.name in self._root_elements] + @property + def constraints(self): + """ + Old reference to identity constraints, for backward compatibility. Will be removed in v1.1.0. + """ + return self.identities + @classmethod def create_meta_schema(cls, source=None, base_schemas=None, global_maps=None): """ @@ -534,9 +619,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param source: an optional argument referencing to or containing the XSD meta-schema \ resource. Required if the schema class doesn't already have a meta-schema. - :param base_schemas: an optional dictionary that contains namespace URIs and schema locations. \ - If provided it's used as substitute for class 's BASE_SCHEMAS. Also a sequence of (namespace, \ - location) items can be provided if there are more schema documents for one or more namespaces. + :param base_schemas: an optional dictionary that contains namespace URIs and \ + schema locations. If provided it's used as substitute for class 's BASE_SCHEMAS. \ + Also a sequence of (namespace, location) items can be provided if there are more \ + schema documents for one or more namespaces. :param global_maps: is an optional argument containing an :class:`XsdGlobals` \ instance for the new meta schema. If not provided a new map is created. """ @@ -572,20 +658,68 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): """Creates a new schema instance of the same class of the caller.""" return cls(*args, **kwargs) - def create_any_content_group(self, parent, name=None): - """Creates a model group related to schema instance that accepts any content.""" - group = self.BUILDERS.group_class(SEQUENCE_ELEMENT, self, parent, name) - group.append(XsdAnyElement(ANY_ELEMENT, self, group)) + def create_any_content_group(self, parent, any_element=None): + """ + Creates a model group related to schema instance that accepts any content. + + :param parent: the parent component to set for the any content group. + :param any_element: an optional any element to use for the content group. \ + When provided it's copied, linked to the group and the minOccurs/maxOccurs \ + are set to 0 and 'unbounded'. + """ + group = self.BUILDERS.group_class(SEQUENCE_ELEMENT, self, parent) + + if any_element is not None: + any_element = any_element.copy() + any_element.min_occurs = 0 + any_element.max_occurs = None + any_element.parent = group + group.append(any_element) + else: + group.append(self.BUILDERS.any_element_class(ANY_ELEMENT, self, group)) + return group - def create_any_attribute_group(self, parent, name=None): - """Creates an attribute group related to schema instance that accepts any attribute.""" - attribute_group = self.BUILDERS.attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent, name) - attribute_group[None] = XsdAnyAttribute(ANY_ATTRIBUTE_ELEMENT, self, attribute_group) + def create_empty_content_group(self, parent, model='sequence'): + if model == 'sequence': + group_elem = etree_element(XSD_SEQUENCE) + elif model == 'choice': + group_elem = etree_element(XSD_CHOICE) + elif model == 'all': + group_elem = etree_element(XSD_ALL) + else: + raise XMLSchemaValueError("'model' argument must be (sequence | choice | all)") + + group_elem.text = '\n ' + return self.BUILDERS.group_class(group_elem, self, parent) + + def create_any_attribute_group(self, parent): + """ + Creates an attribute group related to schema instance that accepts any attribute. + + :param parent: the parent component to set for the any attribute group. + """ + attribute_group = self.BUILDERS.attribute_group_class( + ATTRIBUTE_GROUP_ELEMENT, self, parent + ) + attribute_group[None] = self.BUILDERS.any_attribute_class( + ANY_ATTRIBUTE_ELEMENT, self, attribute_group + ) return attribute_group + def create_empty_attribute_group(self, parent): + """ + Creates an empty attribute group related to schema instance. + + :param parent: the parent component to set for the any attribute group. + """ + return self.BUILDERS.attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent) + def copy(self): - """Makes a copy of the schema instance. The new instance has independent maps of shared XSD components.""" + """ + Makes a copy of the schema instance. The new instance has independent maps + of shared XSD components. + """ schema = object.__new__(self.__class__) schema.__dict__.update(self.__dict__) schema.source = self.source.copy() @@ -614,24 +748,26 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): raise error def build(self): - """Builds the schema XSD global maps.""" + """Builds the schema's XSD global maps.""" self.maps.build() + def clear(self): + """Clears the schema's XSD global maps.""" + self.maps.clear() + @property def built(self): - xsd_global = None - for xsd_global in self.iter_globals(self): - if not isinstance(xsd_global, XsdComponent): - return False - if not xsd_global.built: - return False - - if xsd_global is not None: + if any(not isinstance(g, XsdComponent) or not g.built for g in self.iter_globals()): + return False + for _ in self.iter_globals(): return True + if self.meta_schema is None: + return False + # No XSD globals: check with a lookup of schema child elements. prefix = '{%s}' % self.target_namespace if self.target_namespace else '' for child in filter(lambda x: x.tag != XSD_ANNOTATION, self.root): - if child.tag in (XSD_REDEFINE, XSD_OVERRIDE): + if child.tag in {XSD_REDEFINE, XSD_OVERRIDE}: for e in filter(lambda x: x.tag in self.BUILDERS_MAP, child): name = e.get('name') if name is not None: @@ -654,7 +790,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): def validation_attempted(self): if self.built: return 'full' - elif any([comp.validation_attempted == 'partial' for comp in self.iter_globals()]): + elif any(comp.validation_attempted == 'partial' for comp in self.iter_globals()): return 'partial' else: return 'none' @@ -717,17 +853,19 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): def get_element(self, tag, path=None, namespaces=None): if not path: - return self.find(tag) + return self.find(tag, namespaces) elif path[-1] == '*': return self.find(path[:-1] + tag, namespaces) else: return self.find(path, namespaces) - def _include_schemas(self): + def _parse_inclusions(self): """Processes schema document inclusions and redefinitions.""" - for child in iterchildren_xsd_include(self.root): + for child in filter(lambda x: x.tag == XSD_INCLUDE, self.root): try: - self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Include schema from %r", location) + self.include_schema(location, self.base_url) except KeyError: pass except (OSError, IOError) as err: @@ -746,9 +884,11 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): else: self.errors.append(type(err)(msg)) - for child in iterchildren_xsd_redefine(self.root): + for child in filter(lambda x: x.tag == XSD_REDEFINE, self.root): try: - self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Redefine schema %r", location) + schema = self.include_schema(location, self.base_url) except KeyError: pass # Attribute missing error already found by validation against meta-schema except (OSError, IOError) as err: @@ -756,7 +896,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): # is equivalent to an include, so no error is generated. Otherwise fails. self.warnings.append("Redefine schema failed: %s." % str(err)) warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3) - if has_xsd_components(child): + if any(e.tag != XSD_ANNOTATION for e in child): self.parse_error(str(err), child) except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: msg = 'cannot redefine schema %r: %s' % (child.attrib['schemaLocation'], err) @@ -766,6 +906,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): raise type(err)(msg) else: self.errors.append(type(err)(msg)) + else: + schema.redefine = self def include_schema(self, location, base_url=None): """ @@ -781,8 +923,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): break else: schema = self.create_schema( - schema_url, self.target_namespace, self.validation, self.maps, self.converter, - self.locations, self.base_url, self.defuse, self.timeout, False + source=schema_url, + namespace=self.target_namespace, + validation=self.validation, + global_maps=self.maps, + converter=self.converter, + base_url=self.base_url, + defuse=self.defuse, + timeout=self.timeout, + build=False, ) if location not in self.includes: @@ -791,14 +940,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): self.includes[schema_url] = schema return schema - def _import_namespaces(self): + def _parse_imports(self): """ - Processes namespace imports. Imports are done on namespace basis not on resource: this - is the standard and also avoids import loops that sometimes are hard to detect. + Parse namespace import elements. Imports are done on namespace basis, not on + single resource. A warning is generated for a failure of a namespace import. """ namespace_imports = NamespaceResourcesMap(map( lambda x: (x.get('namespace'), x.get('schemaLocation')), - iterchildren_xsd_import(self.root) + filter(lambda x: x.tag == XSD_IMPORT, self.root) )) for namespace, locations in namespace_imports.items(): @@ -836,35 +985,44 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): if local_hints: locations = local_hints + locations - import_error = None - for url in locations: - try: - self.import_schema(namespace, url, self.base_url) - except (OSError, IOError) as err: - # It's not an error if the location access fails (ref. section 4.2.6.2): - # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport - if import_error is None: - import_error = err - except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: - if namespace: - msg = "cannot import namespace %r: %s." % (namespace, err) - else: - msg = "cannot import chameleon schema: %s." % err - if isinstance(err, (XMLSchemaParseError, ParseError)): - self.parse_error(msg) - elif self.validation == 'strict': - raise type(err)(msg) - else: - self.errors.append(type(err)(msg)) - except XMLSchemaValueError as err: - self.parse_error(err) + if namespace in self.FALLBACK_LOCATIONS: + locations.append(self.FALLBACK_LOCATIONS[namespace]) + + self._import_namespace(namespace, locations) + + def _import_namespace(self, namespace, locations): + import_error = None + for url in locations: + try: + logger.debug("Import namespace %r from %r", namespace, url) + self.import_schema(namespace, url, self.base_url) + except (OSError, IOError) as err: + # It's not an error if the location access fails (ref. section 4.2.6.2): + # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport + logger.debug('%s', err) + if import_error is None: + import_error = err + except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: + if namespace: + msg = "cannot import namespace %r: %s." % (namespace, err) else: - break + msg = "cannot import chameleon schema: %s." % err + if isinstance(err, (XMLSchemaParseError, ParseError)): + self.parse_error(msg) + elif self.validation == 'strict': + raise type(err)(msg) + else: + self.errors.append(type(err)(msg)) + except XMLSchemaValueError as err: + self.parse_error(err) else: - if import_error is not None: - self.warnings.append("Namespace import failed: %s." % str(import_error)) - warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3) - self.imports[namespace] = None + logger.info("Namespace %r imported from %r", namespace, url) + break + else: + if import_error is not None: + self.warnings.append("Namespace import failed: %s." % str(import_error)) + warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3) + self.imports[namespace] = None def import_schema(self, namespace, location, base_url=None, force=False): """ @@ -893,23 +1051,101 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): return schema schema = self.create_schema( - schema_url, None, self.validation, self.maps, self.converter, - self.locations, self.base_url, self.defuse, self.timeout, False + source=schema_url, + validation=self.validation, + global_maps=self.maps, + converter=self.converter, + base_url=self.base_url, + defuse=self.defuse, + timeout=self.timeout, + build=False, ) if schema.target_namespace != namespace: raise XMLSchemaValueError('imported schema %r has an unmatched namespace %r' % (location, namespace)) self.imports[namespace] = schema return schema - def resolve_qname(self, qname): + def version_check(self, elem): + """ + Checks if the element is compatible with the version of the validator and XSD + types/facets availability. + + :param elem: an Element of the schema. + :return: `True` if the schema element is compatible with the validator, \ + `False` otherwise. + """ + if VC_MIN_VERSION in elem.attrib: + vc_min_version = elem.attrib[VC_MIN_VERSION] + if not XSD_VERSION_PATTERN.match(vc_min_version): + self.parse_error("invalid attribute vc:minVersion value", elem) + elif vc_min_version > '1.1': + return False + + if VC_MAX_VERSION in elem.attrib: + vc_max_version = elem.attrib[VC_MAX_VERSION] + if not XSD_VERSION_PATTERN.match(vc_max_version): + self.parse_error("invalid attribute vc:maxVersion value", elem) + elif vc_max_version <= '1.1': + return False + + if VC_TYPE_AVAILABLE in elem.attrib: + for qname in elem.attrib[VC_TYPE_AVAILABLE].split(): + try: + if self.resolve_qname(qname) not in self.maps.types: + return False + except XMLSchemaNamespaceError: + return False + except (KeyError, ValueError) as err: + self.parse_error(str(err), elem) + + if VC_TYPE_UNAVAILABLE in elem.attrib: + for qname in elem.attrib[VC_TYPE_UNAVAILABLE].split(): + try: + if self.resolve_qname(qname) not in self.maps.types: + break + except XMLSchemaNamespaceError: + break + except (KeyError, ValueError) as err: + self.parse_error(err, elem) + else: + return False + + if VC_FACET_AVAILABLE in elem.attrib: + for qname in elem.attrib[VC_FACET_AVAILABLE].split(): + try: + if self.resolve_qname(qname) not in XSD_11_FACETS: + return False + except XMLSchemaNamespaceError: + pass + except (KeyError, ValueError) as err: + self.parse_error(str(err), elem) + + if VC_FACET_UNAVAILABLE in elem.attrib: + for qname in elem.attrib[VC_FACET_UNAVAILABLE].split(): + try: + if self.resolve_qname(qname) not in XSD_11_FACETS: + break + except XMLSchemaNamespaceError: + break + except (KeyError, ValueError) as err: + self.parse_error(err, elem) + else: + return False + + return True + + def resolve_qname(self, qname, namespace_imported=True): """ QName resolution for a schema instance. :param qname: a string in xs:QName format. + :param namespace_imported: if this argument is `True` raises an \ + `XMLSchemaNamespaceError` if the namespace of the QName is not the \ + *targetNamespace* and the namespace is not imported by the schema. :returns: an expanded QName in the format "{*namespace-URI*}*local-name*". - :raises: `XMLSchemaValueError` for an invalid xs:QName or if the namespace prefix is not \ - declared in the schema instance or if the namespace is not the *targetNamespace* and \ - the namespace is not imported by the schema. + :raises: `XMLSchemaValueError` for an invalid xs:QName is found, \ + `XMLSchemaKeyError` if the namespace prefix is not declared in the \ + schema instance. """ qname = qname.strip() if not qname or ' ' in qname or '\t' in qname or '\n' in qname: @@ -929,15 +1165,17 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): try: namespace = self.namespaces[prefix] except KeyError: - raise XMLSchemaValueError("prefix %r not found in namespace map" % prefix) + raise XMLSchemaKeyError("prefix %r not found in namespace map" % prefix) else: namespace, local_name = self.namespaces.get('', ''), qname if not namespace: return local_name - elif self.meta_schema is not None and namespace != self.target_namespace and \ - namespace not in {XSD_NAMESPACE, XSI_NAMESPACE} and namespace not in self.imports: - raise XMLSchemaValueError( + elif namespace_imported and self.meta_schema is not None and \ + namespace != self.target_namespace and \ + namespace not in {XSD_NAMESPACE, XSI_NAMESPACE} and \ + namespace not in self.imports: + raise XMLSchemaNamespaceError( "the QName {!r} is mapped to the namespace {!r}, but this namespace has " "not an xs:import statement in the schema.".format(qname, namespace) ) @@ -976,10 +1214,12 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param namespaces: is an optional mapping from namespace prefix to URI. """ if not self.built: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) - elif not isinstance(source, XMLResource): - source = XMLResource(source=source, defuse=self.defuse, timeout=self.timeout, lazy=False) + if self.meta_schema is not None: + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) + self.build() + if not isinstance(source, XMLResource): + source = XMLResource(source=source, defuse=self.defuse, timeout=self.timeout, lazy=False) if not schema_path and path: schema_path = path if path.startswith('/') else '/%s/%s' % (source.root.tag, path) @@ -987,6 +1227,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): namespaces.update(source.get_namespaces()) id_map = Counter() + inherited = {} if source.is_lazy() and path is None: # TODO: Document validation in lazy mode. @@ -997,7 +1238,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): yield self.validation_error('lax', "%r is not an element of the schema" % source.root, source.root) for result in xsd_element.iter_decode(source.root, source=source, namespaces=namespaces, - use_defaults=use_defaults, id_map=id_map, _no_deep=None): + use_defaults=use_defaults, id_map=id_map, no_depth=True, + inherited=inherited, drop_results=True): if isinstance(result, XMLSchemaValidationError): yield result else: @@ -1008,17 +1250,25 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): schema_path = '/%s/*' % source.root.tag for elem in source.iterfind(path, namespaces): - xsd_element = self.get_element(elem.tag, schema_path, namespaces) + xsd_element = self.get_element(elem.tag, schema_path, self.namespaces) if xsd_element is None: yield self.validation_error('lax', "%r is not an element of the schema" % elem, elem) for result in xsd_element.iter_decode(elem, source=source, namespaces=namespaces, - use_defaults=use_defaults, id_map=id_map): + use_defaults=use_defaults, id_map=id_map, + inherited=inherited, drop_results=True): if isinstance(result, XMLSchemaValidationError): yield result else: del result + # Check unresolved IDREF values + for k, v in id_map.items(): + if isinstance(v, XMLSchemaValidationError): + yield v + elif v == 0: + yield self.validation_error('lax', "IDREF %r not found in XML document" % k, source.root) + def iter_decode(self, source, path=None, schema_path=None, validation='lax', process_namespaces=True, namespaces=None, use_defaults=True, decimal_type=None, datetime_types=False, converter=None, filler=None, fill_missing=False, **kwargs): @@ -1054,8 +1304,11 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): or decoding errors. """ if not self.built: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) - elif validation not in XSD_VALIDATION_MODES: + if self.meta_schema is not None: + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) + self.build() + + if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError("validation argument can be 'strict', 'lax' or 'skip': %r" % validation) elif not isinstance(source, XMLResource): source = XMLResource(source=source, defuse=self.defuse, timeout=self.timeout, lazy=False) @@ -1071,8 +1324,12 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): converter = self.get_converter(converter, namespaces, **kwargs) id_map = Counter() + inherited = {} + if decimal_type is not None: kwargs['decimal_type'] = decimal_type + if filler is not None: + kwargs['filler'] = filler for elem in source.iterfind(path, namespaces): xsd_element = self.get_element(elem.tag, schema_path, namespaces) @@ -1082,9 +1339,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): for obj in xsd_element.iter_decode( elem, validation, converter=converter, source=source, namespaces=namespaces, use_defaults=use_defaults, datetime_types=datetime_types, - filler=filler, fill_missing=fill_missing, id_map=id_map, **kwargs): + fill_missing=fill_missing, id_map=id_map, inherited=inherited, **kwargs): yield obj + for k, v in id_map.items(): + if isinstance(v, XMLSchemaValidationError): + yield v + elif v == 0: + yield self.validation_error('lax', "IDREF %r not found in XML document" % k, source.root) + def decode(self, source, path=None, schema_path=None, validation='strict', *args, **kwargs): """ Decodes XML data. Takes the same arguments of the method :func:`XMLSchema.iter_decode`. @@ -1108,7 +1371,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): to_dict = decode - def iter_encode(self, obj, path=None, validation='lax', namespaces=None, converter=None, **kwargs): + def iter_encode(self, obj, path=None, validation='lax', namespaces=None, converter=None, + unordered=False, **kwargs): """ Creates an iterator for encoding a data structure to an ElementTree's Element. @@ -1119,12 +1383,17 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :param validation: the XSD validation mode. Can be 'strict', 'lax' or 'skip'. :param namespaces: is an optional mapping from namespace prefix to URI. :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the encoding. + :param unordered: a flag for explicitly activating unordered encoding mode for content model \ + data. This mode uses content models for a reordered-by-model iteration of the child elements. :param kwargs: Keyword arguments containing options for converter and encoding. :return: yields an Element instance/s or validation/encoding errors. """ if not self.built: - raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) - elif validation not in XSD_VALIDATION_MODES: + if self.meta_schema is not None: + raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) + self.build() + + if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError("validation argument can be 'strict', 'lax' or 'skip': %r" % validation) elif not self.elements: yield XMLSchemaValueError("encoding needs at least one XSD element declaration!") @@ -1149,7 +1418,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): msg = "unable to select an element for decoding data, provide a valid 'path' argument." yield XMLSchemaEncodeError(self, obj, self.elements, reason=msg) else: - for result in xsd_element.iter_encode(obj, validation, converter=converter, **kwargs): + for result in xsd_element.iter_encode(obj, validation, converter=converter, + unordered=unordered, **kwargs): yield result def encode(self, obj, path=None, validation='strict', *args, **kwargs): @@ -1210,14 +1480,19 @@ class XMLSchema10(XMLSchemaBase): 'any_element_class': XsdAnyElement, 'restriction_class': XsdAtomicRestriction, 'union_class': XsdUnion, + 'key_class': XsdKey, + 'keyref_class': XsdKeyref, + 'unique_class': XsdUnique, 'simple_type_factory': xsd_simple_type_factory } meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.0/XMLSchema.xsd') BASE_SCHEMAS = { XML_NAMESPACE: XML_SCHEMA_FILE, - # HFP_NAMESPACE: HFP_SCHEMA_FILE, XSI_NAMESPACE: XSI_SCHEMA_FILE, + } + FALLBACK_LOCATIONS = { XLINK_NAMESPACE: XLINK_SCHEMA_FILE, + XHTML_NAMESPACE: XHTML_SCHEMA_FILE, } @@ -1268,22 +1543,31 @@ class XMLSchema11(XMLSchemaBase): 'any_element_class': Xsd11AnyElement, 'restriction_class': Xsd11AtomicRestriction, 'union_class': Xsd11Union, - 'simple_type_factory': xsd_simple_type_factory + 'key_class': Xsd11Key, + 'keyref_class': Xsd11Keyref, + 'unique_class': Xsd11Unique, + 'simple_type_factory': xsd_simple_type_factory, } meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.1/XMLSchema.xsd') BASE_SCHEMAS = { - XSD_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSD_1.1/list_builtins.xsd'), + XSD_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSD_1.1/xsd11-extra.xsd'), XML_NAMESPACE: XML_SCHEMA_FILE, - # HFP_NAMESPACE: HFP_SCHEMA_FILE, XSI_NAMESPACE: XSI_SCHEMA_FILE, + VC_NAMESPACE: VC_SCHEMA_FILE, + } + FALLBACK_LOCATIONS = { XLINK_NAMESPACE: XLINK_SCHEMA_FILE, + XHTML_NAMESPACE: XHTML_SCHEMA_FILE, } - def _include_schemas(self): - super(XMLSchema11, self)._include_schemas() - for child in iterchildren_xsd_override(self.root): + def _parse_inclusions(self): + super(XMLSchema11, self)._parse_inclusions() + + for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.root): try: - self.include_schema(child.attrib['schemaLocation'], self.base_url) + location = child.attrib['schemaLocation'].strip() + logger.info("Override schema %r", location) + schema = self.include_schema(location, self.base_url) except KeyError: pass # Attribute missing error already found by validation against meta-schema except (OSError, IOError) as err: @@ -1291,8 +1575,10 @@ class XMLSchema11(XMLSchemaBase): # is equivalent to an include, so no error is generated. Otherwise fails. self.warnings.append("Override schema failed: %s." % str(err)) warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3) - if has_xsd_components(child): + if any(e.tag != XSD_ANNOTATION for e in child): self.parse_error(str(err), child) + else: + schema.override = self XMLSchema = XMLSchema10 diff --git a/xmlschema/validators/schemas/XMLSchema-versioning_minimal.xsd b/xmlschema/validators/schemas/XMLSchema-versioning_minimal.xsd new file mode 100644 index 0000000..e492c69 --- /dev/null +++ b/xmlschema/validators/schemas/XMLSchema-versioning_minimal.xsd @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xmlschema/validators/schemas/XSD_1.1/list_builtins.xsd b/xmlschema/validators/schemas/XSD_1.1/list_builtins.xsd deleted file mode 100644 index e2c4ccb..0000000 --- a/xmlschema/validators/schemas/XSD_1.1/list_builtins.xsd +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd new file mode 100644 index 0000000..ba49a10 --- /dev/null +++ b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index d1fa79f..62bed94 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -17,30 +17,29 @@ from decimal import DecimalException from ..compat import string_base_type, unicode_type from ..etree import etree_element from ..exceptions import XMLSchemaTypeError, XMLSchemaValueError -from ..qnames import ( - XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, - XSD_ANY_ATTRIBUTE, XSD_PATTERN, XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, - XSD_MAX_EXCLUSIVE, XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, - XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, XSD_ID, - XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS -) -from ..helpers import get_qname, local_name, get_xsd_derivation_attribute +from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ + XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE, XSD_PATTERN, \ + XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, \ + XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ + XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ + XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ + XSD_ERROR, XSD_ASSERT, get_qname, local_name +from ..helpers import get_xsd_derivation_attribute -from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, XMLSchemaDecodeError, XMLSchemaParseError +from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ + XMLSchemaDecodeError, XMLSchemaParseError from .xsdbase import XsdAnnotation, XsdType, ValidationMixin -from .facets import XsdFacet, XsdWhiteSpaceFacet, XSD_10_FACETS_BUILDERS, XSD_11_FACETS_BUILDERS, XSD_10_FACETS, \ - XSD_11_FACETS, XSD_10_LIST_FACETS, XSD_11_LIST_FACETS, XSD_10_UNION_FACETS, XSD_11_UNION_FACETS, MULTIPLE_FACETS +from .facets import XsdFacet, XsdWhiteSpaceFacet, XSD_10_FACETS_BUILDERS, \ + XSD_11_FACETS_BUILDERS, XSD_10_FACETS, XSD_11_FACETS, XSD_10_LIST_FACETS, \ + XSD_11_LIST_FACETS, XSD_10_UNION_FACETS, XSD_11_UNION_FACETS, MULTIPLE_FACETS def xsd_simple_type_factory(elem, schema, parent): - try: - name = get_qname(schema.target_namespace, elem.attrib['name']) - except KeyError: - name = None - else: - if name == XSD_ANY_SIMPLE_TYPE: - return - + """ + Factory function for XSD simple types. Parses the xs:simpleType element and its + child component, that can be a restriction, a list or an union. Annotations are + linked to simple type instance, omitting the inner annotation if both are given. + """ annotation = None try: child = elem[0] @@ -48,31 +47,44 @@ def xsd_simple_type_factory(elem, schema, parent): return schema.maps.types[XSD_ANY_SIMPLE_TYPE] else: if child.tag == XSD_ANNOTATION: + annotation = XsdAnnotation(elem[0], schema, child) try: child = elem[1] - annotation = XsdAnnotation(elem[0], schema, child) except IndexError: + schema.parse_error("(restriction | list | union) expected", elem) return schema.maps.types[XSD_ANY_SIMPLE_TYPE] if child.tag == XSD_RESTRICTION: - result = schema.BUILDERS.restriction_class(child, schema, parent, name=name) + xsd_type = schema.BUILDERS.restriction_class(child, schema, parent) elif child.tag == XSD_LIST: - result = XsdList(child, schema, parent, name=name) + xsd_type = XsdList(child, schema, parent) elif child.tag == XSD_UNION: - result = schema.BUILDERS.union_class(child, schema, parent, name=name) + xsd_type = schema.BUILDERS.union_class(child, schema, parent) else: - result = schema.maps.types[XSD_ANY_SIMPLE_TYPE] + schema.parse_error("(restriction | list | union) expected", elem) + return schema.maps.types[XSD_ANY_SIMPLE_TYPE] if annotation is not None: - result.annotation = annotation + xsd_type.annotation = annotation + + try: + xsd_type.name = get_qname(schema.target_namespace, elem.attrib['name']) + except KeyError: + if parent is None: + schema.parse_error("missing attribute 'name' in a global simpleType", elem) + xsd_type.name = 'nameless_%s' % str(id(xsd_type)) + else: + if parent is not None: + schema.parse_error("attribute 'name' not allowed for a local simpleType", elem) + xsd_type.name = None if 'final' in elem.attrib: try: - result._final = get_xsd_derivation_attribute(elem, 'final') + xsd_type._final = get_xsd_derivation_attribute(elem, 'final') except ValueError as err: - result.parse_error(err, elem) + xsd_type.parse_error(err, elem) - return result + return xsd_type class XsdSimpleType(XsdType, ValidationMixin): @@ -80,16 +92,16 @@ class XsdSimpleType(XsdType, ValidationMixin): Base class for simpleTypes definitions. Generally used only for instances of xs:anySimpleType. - - Content: (annotation?, (restriction | list | union)) - + .. + Content: (annotation?, (restriction | list | union)) + """ _special_types = {XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE} - _admitted_tags = {XSD_SIMPLE_TYPE} + _ADMITTED_TAGS = {XSD_SIMPLE_TYPE} min_length = None max_length = None @@ -221,11 +233,22 @@ class XsdSimpleType(XsdType, ValidationMixin): # Checks fraction digits if XSD_TOTAL_DIGITS in facets: - if XSD_FRACTION_DIGITS in facets and facets[XSD_TOTAL_DIGITS].value < facets[XSD_FRACTION_DIGITS].value: - self.parse_error("fractionDigits facet value cannot be lesser than the value of totalDigits") + if XSD_FRACTION_DIGITS in facets and \ + facets[XSD_TOTAL_DIGITS].value < facets[XSD_FRACTION_DIGITS].value: + self.parse_error("fractionDigits facet value cannot be lesser than the " + "value of totalDigits facet") total_digits = base_type.get_facet(XSD_TOTAL_DIGITS) if total_digits is not None and total_digits.value < facets[XSD_TOTAL_DIGITS].value: - self.parse_error("totalDigits facet value cannot be greater than those on the base type") + self.parse_error("totalDigits facet value cannot be greater than " + "the value of the same facet in the base type") + + # Checks XSD 1.1 facets + if XSD_EXPLICIT_TIMEZONE in facets: + explicit_tz_facet = base_type.get_facet(XSD_EXPLICIT_TIMEZONE) + if explicit_tz_facet and explicit_tz_facet.value in ('prohibited', 'required') \ + and facets[XSD_EXPLICIT_TIMEZONE].value != explicit_tz_facet.value: + self.parse_error("the explicitTimezone facet value cannot be changed if the base " + "type has the same facet with value %r" % explicit_tz_facet.value) self.min_length = min_length self.max_length = max_length @@ -256,7 +279,7 @@ class XsdSimpleType(XsdType, ValidationMixin): @property def admitted_facets(self): - return XSD_10_FACETS if self.schema.XSD_VERSION == '1.0' else XSD_11_FACETS + return XSD_10_FACETS if self.xsd_version == '1.0' else XSD_11_FACETS @property def built(self): @@ -270,6 +293,10 @@ class XsdSimpleType(XsdType, ValidationMixin): def is_complex(): return False + @staticmethod + def is_list(): + return False + def is_empty(self): return self.max_length == 0 @@ -327,11 +354,14 @@ class XsdSimpleType(XsdType, ValidationMixin): else: return text + def text_decode(self, text): + return self.decode(text, validation='skip') + def iter_decode(self, obj, validation='lax', **kwargs): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip': + if validation != 'skip' and obj is not None: if self.patterns is not None: for error in self.patterns(obj): yield error @@ -347,7 +377,7 @@ class XsdSimpleType(XsdType, ValidationMixin): elif validation != 'skip': yield self.encode_error(validation, obj, unicode_type) - if validation != 'skip': + if validation != 'skip' and obj is not None: if self.patterns is not None: for error in self.patterns(obj): yield error @@ -370,11 +400,15 @@ class XsdAtomic(XsdSimpleType): a base_type attribute that refers to primitive or derived atomic built-in type or another derived simpleType. """ + to_python = str _special_types = {XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE} - _admitted_tags = {XSD_RESTRICTION, XSD_SIMPLE_TYPE} + _ADMITTED_TAGS = {XSD_RESTRICTION, XSD_SIMPLE_TYPE} def __init__(self, elem, schema, parent, name=None, facets=None, base_type=None): - self.base_type = base_type + if base_type is None: + self.primitive_type = self + else: + self.base_type = base_type super(XsdAtomic, self).__init__(elem, schema, parent, name, facets) def __repr__(self): @@ -384,52 +418,27 @@ class XsdAtomic(XsdSimpleType): return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) def __setattr__(self, name, value): - if name == 'base_type' and value is not None and not isinstance(value, XsdType): - raise XMLSchemaValueError("%r attribute must be an XsdType instance or None: %r" % (name, value)) super(XsdAtomic, self).__setattr__(name, value) - if name in ('base_type', 'white_space'): - if getattr(self, 'white_space', None) is None: + if name == 'base_type': + assert isinstance(value, XsdType) + if not hasattr(self, 'white_space'): try: - white_space = self.base_type.white_space + self.white_space = self.base_type.white_space except AttributeError: - return + pass + try: + if value.is_simple(): + self.primitive_type = self.base_type.primitive_type else: - if white_space is not None: - self.white_space = white_space - - @property - def built(self): - if self.base_type is None: - return True - else: - return self.base_type.is_global or self.base_type.built - - @property - def validation_attempted(self): - if self.built: - return 'full' - else: - return self.base_type.validation_attempted + self.primitive_type = self.base_type.content_type.primitive_type + except AttributeError: + self.primitive_type = value @property def admitted_facets(self): - primitive_type = self.primitive_type - if primitive_type is None or primitive_type.is_complex(): - return XSD_10_FACETS if self.schema.XSD_VERSION == '1.0' else XSD_11_FACETS - return primitive_type.admitted_facets - - @property - def primitive_type(self): - if self.base_type is None: - return self - try: - if self.base_type.is_simple(): - return self.base_type.primitive_type - else: - return self.base_type.content_type.primitive_type - except AttributeError: - # The base_type is XsdList or XsdUnion. - return self.base_type + if self.primitive_type.is_complex(): + return XSD_10_FACETS if self.xsd_version == '1.0' else XSD_11_FACETS + return self.primitive_type.admitted_facets def get_facet(self, tag): try: @@ -444,10 +453,6 @@ class XsdAtomic(XsdSimpleType): def is_atomic(): return True - @staticmethod - def is_list(): - return False - class XsdAtomicBuiltin(XsdAtomic): """ @@ -460,8 +465,8 @@ class XsdAtomicBuiltin(XsdAtomic): - to_python(value): Decoding from XML - from_python(value): Encoding to XML """ - def __init__(self, elem, schema, name, python_type, base_type=None, admitted_facets=None, facets=None, - to_python=None, from_python=None): + def __init__(self, elem, schema, name, python_type, base_type=None, admitted_facets=None, + facets=None, to_python=None, from_python=None): """ :param name: the XSD type's qualified name. :param python_type: the correspondent Python's type. If a tuple or list of types \ @@ -479,7 +484,7 @@ class XsdAtomicBuiltin(XsdAtomic): if not callable(python_type): raise XMLSchemaTypeError("%r object is not callable" % python_type.__class__) - if base_type is None and not admitted_facets: + if base_type is None and not admitted_facets and name != XSD_ERROR: raise XMLSchemaValueError("argument 'admitted_facets' must be a not empty set of a primitive type") self._admitted_facets = admitted_facets @@ -498,6 +503,9 @@ class XsdAtomicBuiltin(XsdAtomic): def admitted_facets(self): return self._admitted_facets or self.primitive_type.admitted_facets + def is_datetime(self): + return self.to_python.__name__ == 'fromstring' + def iter_decode(self, obj, validation='lax', **kwargs): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) @@ -511,10 +519,23 @@ class XsdAtomicBuiltin(XsdAtomic): except KeyError: pass else: - id_map[obj] += 1 - if id_map[obj] > 1: + try: + id_map[obj] += 1 + except TypeError: + id_map[obj] = 1 + + if id_map[obj] > 1 and '_skip_id' not in kwargs: yield self.validation_error(validation, "Duplicated xsd:ID value {!r}".format(obj)) + elif self.name == XSD_IDREF: + try: + id_map = kwargs['id_map'] + except KeyError: + pass + else: + if obj not in id_map: + id_map[obj] = kwargs.get('node', 0) + if validation == 'skip': try: yield self.to_python(obj) @@ -532,6 +553,11 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.decode_error(validation, obj, self.to_python, reason=str(err)) yield None return + except TypeError: + # xs:error type (eg. an XSD 1.1 type alternative used to catch invalid values) + yield self.validation_error(validation, "Invalid value {!r}".format(obj)) + yield None + return for validator in self.validators: for error in validator(result): @@ -570,6 +596,10 @@ class XsdAtomicBuiltin(XsdAtomic): yield self.encode_error(validation, obj, self.from_python) yield None return + except TypeError: + yield self.validation_error(validation, "Invalid value {!r}".format(obj)) + yield None + return for validator in self.validators: for error in validator(obj): @@ -592,14 +622,14 @@ class XsdList(XsdSimpleType): Class for 'list' definitions. A list definition has an item_type attribute that refers to an atomic or union simpleType definition. - - Content: (annotation?, simpleType?) - + .. + Content: (annotation?, simpleType?) + """ - _admitted_tags = {XSD_LIST} + _ADMITTED_TAGS = {XSD_LIST} _white_space_elem = etree_element(XSD_WHITE_SPACE, attrib={'value': 'collapse', 'fixed': 'true'}) def __init__(self, elem, schema, parent, name=None): @@ -631,7 +661,7 @@ class XsdList(XsdSimpleType): super(XsdList, self)._parse() elem = self.elem - child = self._parse_component(elem, required=False) + child = self._parse_child_component(elem) if child is not None: # Case of a local simpleType declaration inside the list tag try: @@ -647,22 +677,29 @@ class XsdList(XsdSimpleType): # List tag with itemType attribute that refers to a global type try: item_qname = self.schema.resolve_qname(elem.attrib['itemType']) - except KeyError: - self.parse_error("missing list type declaration", elem) - base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] - except ValueError as err: - self.parse_error(err, elem) + except (KeyError, ValueError, RuntimeError) as err: + if 'itemType' not in elem.attrib: + self.parse_error("missing list type declaration") + else: + self.parse_error(err) base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] else: try: base_type = self.maps.lookup_type(item_qname) - except LookupError: + except KeyError: self.parse_error("unknown itemType %r" % elem.attrib['itemType'], elem) base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] + else: + if isinstance(base_type, tuple): + self.parse_error("circular definition found for type {!r}".format(item_qname)) + base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] if base_type.final == '#all' or 'list' in base_type.final: self.parse_error("'final' value of the itemType %r forbids derivation by list" % base_type) + if base_type is self.any_atomic_type: + self.parse_error("Cannot use xs:anyAtomicType as base type of a user-defined type") + try: self.base_type = base_type except XMLSchemaValueError as err: @@ -671,23 +708,12 @@ class XsdList(XsdSimpleType): @property def admitted_facets(self): - return XSD_10_LIST_FACETS if self.schema.XSD_VERSION == '1.0' else XSD_11_LIST_FACETS + return XSD_10_LIST_FACETS if self.xsd_version == '1.0' else XSD_11_LIST_FACETS @property def item_type(self): return self.base_type - @property - def built(self): - return self.base_type.is_global or self.base_type.built - - @property - def validation_attempted(self): - if self.built: - return 'full' - else: - return self.base_type.validation_attempted - @staticmethod def is_atomic(): return False @@ -711,7 +737,7 @@ class XsdList(XsdSimpleType): def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self - if not self.base_type.is_global: + if self.base_type.parent is not None: for obj in self.base_type.iter_components(xsd_classes): yield obj @@ -719,10 +745,6 @@ class XsdList(XsdSimpleType): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - items = [] for chunk in obj.split(): for result in self.base_type.iter_decode(chunk, validation, **kwargs): @@ -731,22 +753,12 @@ class XsdList(XsdSimpleType): else: items.append(result) - if validation != 'skip': - for validator in self.validators: - for error in validator(items): - yield error - yield items def iter_encode(self, obj, validation='lax', **kwargs): if not hasattr(obj, '__iter__') or isinstance(obj, (str, unicode_type, bytes)): obj = [obj] - if validation != 'skip': - for validator in self.validators: - for error in validator(obj): - yield error - encoded_items = [] for item in obj: for result in self.base_type.iter_encode(item, validation, **kwargs): @@ -763,15 +775,15 @@ class XsdUnion(XsdSimpleType): Class for 'union' definitions. A union definition has a member_types attribute that refers to a 'simpleType' definition. - - Content: (annotation?, simpleType*) - + .. + Content: (annotation?, simpleType*) + """ - _admitted_types = XsdSimpleType - _admitted_tags = {XSD_UNION} + _ADMITTED_TYPES = XsdSimpleType + _ADMITTED_TAGS = {XSD_UNION} member_types = None @@ -804,7 +816,7 @@ class XsdUnion(XsdSimpleType): elem = self.elem member_types = [] - for child in self._iterparse_components(elem): + for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): mt = xsd_simple_type_factory(child, self.schema, self) if isinstance(mt, XMLSchemaParseError): self.parse_error(mt) @@ -815,13 +827,13 @@ class XsdUnion(XsdSimpleType): for name in elem.attrib['memberTypes'].split(): try: type_qname = self.schema.resolve_qname(name) - except ValueError as err: + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err) continue try: mt = self.maps.lookup_type(type_qname) - except LookupError: + except KeyError: self.parse_error("unknown member type %r" % type_qname) mt = self.maps.types[XSD_ANY_ATOMIC_TYPE] except XMLSchemaParseError as err: @@ -831,36 +843,25 @@ class XsdUnion(XsdSimpleType): if isinstance(mt, tuple): self.parse_error("circular definition found on xs:union type {!r}".format(self.name)) continue - elif not isinstance(mt, self._admitted_types): - self.parse_error("a {!r} required, not {!r}".format(self._admitted_types, mt)) + elif not isinstance(mt, self._ADMITTED_TYPES): + self.parse_error("a {!r} required, not {!r}".format(self._ADMITTED_TYPES, mt)) continue elif mt.final == '#all' or 'union' in mt.final: self.parse_error("'final' value of the memberTypes %r forbids derivation by union" % member_types) member_types.append(mt) - if member_types: - self.member_types = member_types - else: + if not member_types: self.parse_error("missing xs:union type declarations", elem) self.member_types = [self.maps.types[XSD_ANY_ATOMIC_TYPE]] + elif any(mt is self.any_atomic_type for mt in member_types): + self.parse_error("Cannot use xs:anyAtomicType as base type of a user-defined type") + else: + self.member_types = member_types @property def admitted_facets(self): - return XSD_10_UNION_FACETS if self.schema.XSD_VERSION == '1.0' else XSD_11_UNION_FACETS - - @property - def built(self): - return all([mt.is_global or mt.built for mt in self.member_types]) - - @property - def validation_attempted(self): - if self.built: - return 'full' - elif any([mt.validation_attempted == 'partial' for mt in self.member_types]): - return 'partial' - else: - return 'none' + return XSD_10_UNION_FACETS if self.xsd_version == '1.0' else XSD_11_UNION_FACETS def is_atomic(self): return all(mt.is_atomic() for mt in self.member_types) @@ -868,37 +869,33 @@ class XsdUnion(XsdSimpleType): def is_list(self): return all(mt.is_list() for mt in self.member_types) + def is_dynamic_consistent(self, other): + return other.is_derived(self) or hasattr(other, 'member_types') and \ + any(mt1.is_derived(mt2) for mt1 in other.member_types for mt2 in self.member_types) + def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self - for mt in self.member_types: - if not mt.is_global: - for obj in mt.iter_components(xsd_classes): - yield obj + for mt in filter(lambda x: x.parent is not None, self.member_types): + for obj in mt.iter_components(xsd_classes): + yield obj - def iter_decode(self, obj, validation='lax', **kwargs): - if isinstance(obj, (string_base_type, bytes)): - obj = self.normalize(obj) - - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - - # Try the text as a whole + def iter_decode(self, obj, validation='lax', patterns=None, **kwargs): + # Try decoding the whole text for member_type in self.member_types: for result in member_type.iter_decode(obj, validation='lax', **kwargs): if not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(result): - yield error + if validation != 'skip' and patterns: + obj = member_type.normalize(obj) + for error in patterns(obj): + yield error yield result return break if validation != 'skip' and ' ' not in obj.strip(): - reason = "no type suitable for decoding %r." % obj + reason = "invalid value %r." % obj yield self.decode_error(validation, obj, self.member_types, reason) items = [] @@ -923,24 +920,12 @@ class XsdUnion(XsdSimpleType): reason = "no type suitable for decoding the values %r." % not_decodable yield self.decode_error(validation, obj, self.member_types, reason) - for validator in self.validators: - for error in validator(items): - yield error - yield items if len(items) > 1 else items[0] if items else None def iter_encode(self, obj, validation='lax', **kwargs): for member_type in self.member_types: for result in member_type.iter_encode(obj, validation='lax', **kwargs): if result is not None and not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(obj): - yield error - if self.patterns is not None: - for error in self.patterns(result): - yield error - yield result return elif validation == 'strict': @@ -953,14 +938,6 @@ class XsdUnion(XsdSimpleType): for item in obj: for result in member_type.iter_encode(item, validation='lax', **kwargs): if result is not None and not isinstance(result, XMLSchemaValidationError): - if validation != 'skip': - for validator in self.validators: - for error in validator(result): - yield error - if self.patterns is not None: - for error in self.patterns(result): - yield error - results.append(result) break elif validation == 'strict': @@ -979,25 +956,25 @@ class XsdUnion(XsdSimpleType): class Xsd11Union(XsdUnion): - - _admitted_types = XsdAtomic, XsdList, XsdUnion + _ADMITTED_TYPES = XsdAtomic, XsdList, XsdUnion class XsdAtomicRestriction(XsdAtomic): """ Class for XSD 1.0 atomic simpleType and complexType's simpleContent restrictions. - - Content: (annotation?, (simpleType?, (minExclusive | minInclusive | maxExclusive | - maxInclusive | totalDigits | fractionDigits | length | minLength | maxLength | - enumeration | whiteSpace | pattern)*)) - + .. + Content: (annotation?, (simpleType?, (minExclusive | minInclusive | maxExclusive | + maxInclusive | totalDigits | fractionDigits | length | minLength | maxLength | + enumeration | whiteSpace | pattern)*)) + """ FACETS_BUILDERS = XSD_10_FACETS_BUILDERS derivation = 'restriction' + _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE} def __setattr__(self, name, value): if name == 'elem' and value is not None: @@ -1012,7 +989,7 @@ class XsdAtomicRestriction(XsdAtomic): if elem.get('name') == XSD_ANY_ATOMIC_TYPE: return # skip special type xs:anyAtomicType elif elem.tag == XSD_SIMPLE_TYPE and elem.get('name') is not None: - elem = self._parse_component(elem) # Global simpleType with internal restriction + elem = self._parse_child_component(elem) # Global simpleType with internal restriction if self.name is not None and self.parent is not None: self.parse_error("'name' attribute in a local simpleType definition", elem) @@ -1025,7 +1002,7 @@ class XsdAtomicRestriction(XsdAtomic): if 'base' in elem.attrib: try: base_qname = self.schema.resolve_qname(elem.attrib['base']) - except ValueError as err: + except (KeyError, ValueError, RuntimeError) as err: self.parse_error(err, elem) base_type = self.maps.type[XSD_ANY_ATOMIC_TYPE] else: @@ -1041,7 +1018,7 @@ class XsdAtomicRestriction(XsdAtomic): try: base_type = self.maps.lookup_type(base_qname) - except LookupError: + except KeyError: self.parse_error("unknown type %r." % elem.attrib['base']) base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] except XMLSchemaParseError as err: @@ -1056,7 +1033,10 @@ class XsdAtomicRestriction(XsdAtomic): self.parse_error("wrong base type {!r}, an atomic type required") elif base_type.is_complex(): if base_type.mixed and base_type.is_emptiable(): - if self._parse_component(elem, strict=False).tag != XSD_SIMPLE_TYPE: + child = self._parse_child_component(elem, strict=False) + if child is None: + self.parse_error("an xs:simpleType definition expected") + elif child.tag != XSD_SIMPLE_TYPE: # See: "http://www.w3.org/TR/xmlschema-2/#element-restriction" self.parse_error( "when a complexType with simpleContent restricts a complexType " @@ -1066,8 +1046,8 @@ class XsdAtomicRestriction(XsdAtomic): elif self.parent is None or self.parent.is_simple(): self.parse_error("simpleType restriction of %r is not allowed" % base_type, elem) - for child in self._iterparse_components(elem): - if child.tag in {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE}: + for child in filter(lambda x: x.tag != XSD_ANNOTATION, elem): + if child.tag in self._CONTENT_TAIL_TAGS: has_attributes = True # only if it's a complexType restriction elif has_attributes: self.parse_error("unexpected tag after attribute declarations", child) @@ -1122,6 +1102,8 @@ class XsdAtomicRestriction(XsdAtomic): self.parse_error("missing base type in restriction:", self) elif base_type.final == '#all' or 'restriction' in base_type.final: self.parse_error("'final' value of the baseType %r forbids derivation by restriction" % base_type) + if base_type is self.any_atomic_type: + self.parse_error("Cannot use xs:anyAtomicType as base type of a user-defined type") self.base_type = base_type self.facets = facets @@ -1129,7 +1111,7 @@ class XsdAtomicRestriction(XsdAtomic): def iter_components(self, xsd_classes=None): if xsd_classes is None or isinstance(self, xsd_classes): yield self - if not self.base_type.is_global: + if self.base_type.parent is not None: for obj in self.base_type.iter_components(xsd_classes): yield obj @@ -1137,10 +1119,6 @@ class XsdAtomicRestriction(XsdAtomic): if isinstance(obj, (string_base_type, bytes)): obj = self.normalize(obj) - if validation != 'skip' and self.patterns: - for error in self.patterns(obj): - yield error - if self.base_type.is_simple(): base_type = self.base_type elif self.base_type.has_simple_content(): @@ -1152,13 +1130,20 @@ class XsdAtomicRestriction(XsdAtomic): raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " "simple or mixed content required." % self.base_type) + if validation != 'skip' and self.patterns: + if not isinstance(self.primitive_type, XsdUnion): + for error in self.patterns(obj): + yield error + elif 'patterns' not in kwargs: + kwargs['patterns'] = self.patterns + for result in base_type.iter_decode(obj, validation, **kwargs): if isinstance(result, XMLSchemaValidationError): yield result if isinstance(result, XMLSchemaDecodeError): yield unicode_type(obj) if validation == 'skip' else None else: - if validation != 'skip': + if validation != 'skip' and result is not None: for validator in self.validators: for error in validator(result): yield error @@ -1170,35 +1155,21 @@ class XsdAtomicRestriction(XsdAtomic): if self.is_list(): if not hasattr(obj, '__iter__') or isinstance(obj, (str, unicode_type, bytes)): obj = [] if obj is None or obj == '' else [obj] - - if validation != 'skip': - for validator in self.validators: - for error in validator(obj): - yield error - - for result in self.base_type.iter_encode(obj, validation): - if isinstance(result, XMLSchemaValidationError): - yield result - if isinstance(result, XMLSchemaEncodeError): - yield unicode_type(obj) if validation == 'skip' else None - return - else: - yield result - return - - if isinstance(obj, (string_base_type, bytes)): - obj = self.normalize(obj) - - if self.base_type.is_simple(): base_type = self.base_type - elif self.base_type.has_simple_content(): - base_type = self.base_type.content_type - elif self.base_type.mixed: - yield unicode_type(obj) - return else: - raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " - "simple or mixed content required." % self.base_type) + if isinstance(obj, (string_base_type, bytes)): + obj = self.normalize(obj) + + if self.base_type.is_simple(): + base_type = self.base_type + elif self.base_type.has_simple_content(): + base_type = self.base_type.content_type + elif self.base_type.mixed: + yield unicode_type(obj) + return + else: + raise XMLSchemaValueError("wrong base type %r: a simpleType or a complexType with " + "simple or mixed content required." % self.base_type) for result in base_type.iter_encode(obj, validation): if isinstance(result, XMLSchemaValidationError): @@ -1207,7 +1178,11 @@ class XsdAtomicRestriction(XsdAtomic): yield unicode_type(obj) if validation == 'skip' else None return else: - if validation != 'skip': + if validation != 'skip' and self.validators and obj is not None: + if isinstance(obj, (string_base_type, bytes)): + if self.primitive_type.is_datetime(): + obj = self.primitive_type.to_python(obj) + for validator in self.validators: for error in validator(obj): yield error @@ -1223,14 +1198,15 @@ class Xsd11AtomicRestriction(XsdAtomicRestriction): """ Class for XSD 1.1 atomic simpleType and complexType's simpleContent restrictions. - - Content: (annotation?, (simpleType?, (minExclusive | minInclusive | maxExclusive | - maxInclusive | totalDigits | fractionDigits | length | minLength | maxLength | - enumeration | whiteSpace | pattern | assertion | explicitTimezone | - {any with namespace: ##other})*)) - + .. + Content: (annotation?, (simpleType?, (minExclusive | minInclusive | maxExclusive | + maxInclusive | totalDigits | fractionDigits | length | minLength | maxLength | + enumeration | whiteSpace | pattern | assertion | explicitTimezone | + {any with namespace: ##other})*)) + """ FACETS_BUILDERS = XSD_11_FACETS_BUILDERS + _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE, XSD_ASSERT} diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 16a9eff..ade601b 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -14,20 +14,21 @@ This module contains classes for XML Schema wildcards. from __future__ import unicode_literals from ..exceptions import XMLSchemaValueError -from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, XSD_DEFAULT_OPEN_CONTENT -from ..helpers import get_namespace from ..namespaces import XSI_NAMESPACE -from ..xpath import ElementPathMixin +from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, \ + XSD_DEFAULT_OPEN_CONTENT, get_namespace +from ..xpath import XMLSchemaProxy, ElementPathMixin from .exceptions import XMLSchemaNotBuiltError from .xsdbase import ValidationMixin, XsdComponent, ParticleMixin class XsdWildcard(XsdComponent, ValidationMixin): - names = {} - namespace = '##any' + names = () + namespace = ('##any',) not_namespace = () not_qname = () + process_contents = 'strict' def __init__(self, elem, schema, parent): if parent is None: @@ -35,9 +36,14 @@ class XsdWildcard(XsdComponent, ValidationMixin): super(XsdWildcard, self).__init__(elem, schema, parent) def __repr__(self): - return '%s(namespace=%r, process_contents=%r)' % ( - self.__class__.__name__, self.namespace, self.process_contents - ) + if self.not_namespace: + return '%s(not_namespace=%r, process_contents=%r)' % ( + self.__class__.__name__, self.not_namespace, self.process_contents + ) + else: + return '%s(namespace=%r, process_contents=%r)' % ( + self.__class__.__name__, self.namespace, self.process_contents + ) def _parse(self): super(XsdWildcard, self)._parse() @@ -46,16 +52,82 @@ class XsdWildcard(XsdComponent, ValidationMixin): namespace = self.elem.get('namespace', '##any').strip() if namespace == '##any': pass - elif namespace in {'##other', '##local', '##targetNamespace'}: - self.namespace = namespace - elif not all(not s.startswith('##') or s in {'##local', '##targetNamespace'} for s in namespace.split()): - self.parse_error("wrong value %r for 'namespace' attribute." % namespace) + elif not namespace: + self.namespace = [] # an empty value means no namespace allowed! + elif namespace == '##other': + self.namespace = [namespace] + elif namespace == '##local': + self.namespace = [''] + elif namespace == '##targetNamespace': + self.namespace = [self.target_namespace] else: - self.namespace = namespace + self.namespace = [] + for ns in namespace.split(): + if ns == '##local': + self.namespace.append('') + elif ns == '##targetNamespace': + self.namespace.append(self.target_namespace) + elif ns.startswith('##'): + self.parse_error("wrong value %r in 'namespace' attribute" % ns) + else: + self.namespace.append(ns) - self.process_contents = self.elem.get('processContents', 'strict') - if self.process_contents not in {'lax', 'skip', 'strict'}: - self.parse_error("wrong value %r for 'processContents' attribute." % self.process_contents) + process_contents = self.elem.get('processContents', 'strict') + if process_contents == 'strict': + pass + elif process_contents not in ('lax', 'skip'): + self.parse_error("wrong value %r for 'processContents' attribute" % self.process_contents) + else: + self.process_contents = process_contents + + def _parse_not_constraints(self): + if 'notNamespace' not in self.elem.attrib: + pass + elif 'namespace' in self.elem.attrib: + self.parse_error("'namespace' and 'notNamespace' attributes are mutually exclusive") + else: + self.namespace = [] + self.not_namespace = [] + for ns in self.elem.attrib['notNamespace'].strip().split(): + if ns == '##local': + self.not_namespace.append('') + elif ns == '##targetNamespace': + self.not_namespace.append(self.target_namespace) + elif ns.startswith('##'): + self.parse_error("wrong value %r in 'notNamespace' attribute" % ns) + else: + self.not_namespace.append(ns) + + # Parse notQName attribute + if 'notQName' not in self.elem.attrib: + return + + not_qname = self.elem.attrib['notQName'].strip().split() + + if isinstance(self, XsdAnyAttribute) and \ + not all(not s.startswith('##') or s == '##defined' for s in not_qname) or \ + not all(not s.startswith('##') or s in {'##defined', '##definedSibling'} for s in not_qname): + self.parse_error("wrong value for 'notQName' attribute") + return + + try: + names = [x if x.startswith('##') else self.schema.resolve_qname(x, False) + for x in not_qname] + except KeyError as err: + self.parse_error("unmapped QName in 'notQName' attribute: %s" % str(err)) + return + except ValueError as err: + self.parse_error("wrong QName format in 'notQName' attribute: %s" % str(err)) + return + + if self.not_namespace: + if any(not x.startswith('##') for x in names) and \ + all(get_namespace(x) in self.not_namespace for x in names if not x.startswith('##')): + self.parse_error("the namespace of each QName in notQName is allowed by notNamespace") + elif any(not self.is_namespace_allowed(get_namespace(x)) for x in names if not x.startswith('##')): + self.parse_error("names in notQName must be in namespaces that are allowed") + + self.not_qname = names def _load_namespace(self, namespace): if namespace in self.schema.maps.namespaces: @@ -80,18 +152,7 @@ class XsdWildcard(XsdComponent, ValidationMixin): def built(self): return True - def iter_namespaces(self): - if self.namespace in ('##any', '##other'): - return - for ns in self.namespace.split(): - if ns == '##local': - yield '' - elif ns == '##targetNamespace': - yield self.target_namespace - else: - yield ns - - def is_matching(self, name, default_namespace=None): + def is_matching(self, name, default_namespace=None, **kwargs): if name is None: return False elif not name or name[0] == '{': @@ -103,28 +164,33 @@ class XsdWildcard(XsdComponent, ValidationMixin): def is_namespace_allowed(self, namespace): if self.not_namespace: - if '##local' in self.not_namespace and namespace == '': - return False - elif '##targetNamespace' in self.not_namespace and namespace == self.target_namespace: - return False - else: - return namespace not in self.not_namespace - - elif self.namespace == '##any' or namespace == XSI_NAMESPACE: + return namespace not in self.not_namespace + elif '##any' in self.namespace or namespace == XSI_NAMESPACE: return True - elif self.namespace == '##other': - if namespace: - return namespace != self.target_namespace - else: - return False + elif '##other' in self.namespace: + return namespace and namespace != self.target_namespace else: - any_namespaces = self.namespace.split() - if '##local' in any_namespaces and namespace == '': - return True - elif '##targetNamespace' in any_namespaces and namespace == self.target_namespace: - return True - else: - return namespace in any_namespaces + return namespace in self.namespace + + def deny_namespaces(self, namespaces): + if self.not_namespace: + return all(x in self.not_namespace for x in namespaces) + elif '##any' in self.namespace: + return False + elif '##other' in self.namespace: + return all(x == self.target_namespace for x in namespaces) + else: + return all(x not in self.namespace for x in namespaces) + + def deny_qnames(self, names): + if self.not_namespace: + return all(x in self.not_qname or get_namespace(x) in self.not_namespace for x in names) + elif '##any' in self.namespace: + return all(x in self.not_qname for x in names) + elif '##other' in self.namespace: + return all(x in self.not_qname or get_namespace(x) == self.target_namespace for x in names) + else: + return all(x in self.not_qname or get_namespace(x) not in self.namespace for x in names) def is_restriction(self, other, check_occurs=True): if check_occurs and isinstance(self, ParticleMixin) and not self.has_occurs_restriction(other): @@ -135,69 +201,234 @@ class XsdWildcard(XsdComponent, ValidationMixin): return False elif other.process_contents == 'lax' and self.process_contents == 'skip': return False - elif self.namespace == other.namespace: - return True - elif other.namespace == '##any': - return True - elif self.namespace == '##any': + + if not self.not_qname and not other.not_qname: + pass + elif '##defined' in other.not_qname and '##defined' not in self.not_qname: + return False + elif '##definedSibling' in other.not_qname and '##definedSibling' not in self.not_qname: + return False + elif other.not_qname: + if not self.deny_qnames(x for x in other.not_qname if not x.startswith('##')): + return False + elif any(not other.is_namespace_allowed(get_namespace(x)) + for x in self.not_qname if not x.startswith('##')): return False - other_namespaces = other.namespace.split() - for ns in self.namespace.split(): - if ns in other_namespaces: - continue - elif ns == self.target_namespace: - if '##targetNamespace' in other_namespaces: - continue - elif not ns.startswith('##') and '##other' in other_namespaces: - continue - return False - return True + if self.not_namespace: + if other.not_namespace: + return all(ns in self.not_namespace for ns in other.not_namespace) + elif '##any' in other.namespace: + return True + elif '##other' in other.namespace: + return '' in self.not_namespace and other.target_namespace in self.not_namespace + else: + return False + elif other.not_namespace: + if '##any' in self.namespace: + return False + elif '##other' in self.namespace: + return set(other.not_namespace).issubset({'', other.target_namespace}) + else: + return all(ns not in other.not_namespace for ns in self.namespace) - def iter_decode(self, source, validation='lax', *args, **kwargs): + if self.namespace == other.namespace: + return True + elif '##any' in other.namespace: + return True + elif '##any' in self.namespace or '##other' in self.namespace: + return False + elif '##other' in other.namespace: + return other.target_namespace not in self.namespace and '' not in self.namespace + else: + return all(ns in other.namespace for ns in self.namespace) + + def union(self, other): + """ + Update an XSD wildcard with the union of itself and another XSD wildcard. + """ + if not self.not_qname: + self.not_qname = other.not_qname[:] + else: + self.not_qname = [ + x for x in self.not_qname + if x in other.not_qname or not other.is_namespace_allowed(get_namespace(x)) + ] + + if self.not_namespace: + if other.not_namespace: + self.not_namespace = [ns for ns in self.not_namespace if ns in other.not_namespace] + elif '##any' in other.namespace: + self.not_namespace = [] + self.namespace = ['##any'] + return + elif '##other' in other.namespace: + not_namespace = ('', other.target_namespace) + self.not_namespace = [ns for ns in self.not_namespace if ns in not_namespace] + else: + self.not_namespace = [ns for ns in self.not_namespace if ns not in other.namespace] + + if not self.not_namespace: + self.namespace = ['##any'] + return + + elif other.not_namespace: + if '##any' in self.namespace: + return + elif '##other' in self.namespace: + not_namespace = ('', self.target_namespace) + self.not_namespace = [ns for ns in other.not_namespace if ns in not_namespace] + else: + self.not_namespace = [ns for ns in other.not_namespace if ns not in self.namespace] + + self.namespace = ['##any'] if not self.not_namespace else [] + return + + if '##any' in self.namespace or self.namespace == other.namespace: + return + elif '##any' in other.namespace: + self.namespace = ['##any'] + return + elif '##other' in other.namespace: + w1, w2 = other, self + elif '##other' in self.namespace: + w1, w2 = self, other + else: + self.namespace.extend(ns for ns in other.namespace if ns not in self.namespace) + return + + if w1.target_namespace in w2.namespace and '' in w2.namespace: + self.namespace = ['##any'] + elif '' not in w2.namespace and w1.target_namespace == w2.target_namespace: + self.namespace = ['##other'] + elif self.xsd_version == '1.0': + msg = "not expressible wildcard namespace union: {!r} V {!r}:" + raise XMLSchemaValueError(msg.format(other.namespace, self.namespace)) + else: + self.namespace = [] + self.not_namespace = ['', w1.target_namespace] if w1.target_namespace else [''] + + def intersection(self, other): + """ + Update an XSD wildcard with the intersection of itself and another XSD wildcard. + """ + if self.not_qname: + self.not_qname.extend(x for x in other.not_qname if x not in self.not_qname) + else: + self.not_qname = [x for x in other.not_qname] + + if self.not_namespace: + if other.not_namespace: + self.not_namespace.extend(ns for ns in other.not_namespace if ns not in self.not_namespace) + elif '##any' in other.namespace: + pass + elif '##other' not in other.namespace: + self.namespace = [ns for ns in other.namespace if ns not in self.not_namespace] + self.not_namespace = [] + else: + if other.target_namespace not in self.not_namespace: + self.not_namespace.append(other.target_namespace) + if '' not in self.not_namespace: + self.not_namespace.append('') + return + + elif other.not_namespace: + if '##any' in self.namespace: + self.not_namespace = [ns for ns in other.not_namespace] + self.namespace = [] + elif '##other' not in self.namespace: + self.namespace = [ns for ns in self.namespace if ns not in other.not_namespace] + else: + self.not_namespace = [ns for ns in other.not_namespace] + if self.target_namespace not in self.not_namespace: + self.not_namespace.append(self.target_namespace) + if '' not in self.not_namespace: + self.not_namespace.append('') + self.namespace = [] + return + + if self.namespace == other.namespace: + return + elif '##any' in other.namespace: + return + elif '##any' in self.namespace: + self.namespace = other.namespace[:] + elif '##other' in self.namespace: + self.namespace = [ns for ns in other.namespace if ns not in ('', self.target_namespace)] + elif '##other' not in other.namespace: + self.namespace = [ns for ns in self.namespace if ns in other.namespace] + else: + if other.target_namespace in self.namespace: + self.namespace.remove(other.target_namespace) + if '' in self.namespace: + self.namespace.remove('') + + def iter_decode(self, source, validation='lax', **kwargs): raise NotImplementedError - def iter_encode(self, obj, validation='lax', *args, **kwargs): + def iter_encode(self, obj, validation='lax', **kwargs): raise NotImplementedError class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): """ - Class for XSD 1.0 'any' wildcards. + Class for XSD 1.0 *any* wildcards. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_ANY} + _ADMITTED_TAGS = {XSD_ANY} + precedences = () def __repr__(self): - return '%s(namespace=%r, process_contents=%r, occurs=%r)' % ( - self.__class__.__name__, self.namespace, self.process_contents, self.occurs - ) + if self.namespace: + return '%s(namespace=%r, process_contents=%r, occurs=%r)' % ( + self.__class__.__name__, self.namespace, self.process_contents, self.occurs + ) + else: + return '%s(not_namespace=%r, process_contents=%r, occurs=%r)' % ( + self.__class__.__name__, self.not_namespace, self.process_contents, self.occurs + ) + + @property + def xpath_proxy(self): + return XMLSchemaProxy(self.schema, self) def _parse(self): super(XsdAnyElement, self)._parse() self._parse_particle(self.elem) - def is_emptiable(self): - return self.min_occurs == 0 or self.process_contents != 'strict' + def match(self, name, default_namespace=None, resolve=False, **kwargs): + """ + Returns the element wildcard if name is matching the name provided + as argument, `None` otherwise. - def match(self, name, default_namespace=None): - if self.is_matching(name, default_namespace): - try: - if name[0] != '{' and default_namespace: - return self.maps.lookup_element('{%s}%s' % (default_namespace, name)) - else: - return self.maps.lookup_element(name) - except LookupError: - pass + :param name: a local or fully-qualified name. + :param default_namespace: used when it's not `None` and not empty for \ + completing local name arguments. + :param resolve: when `True` it doesn't return the wildcard but try to \ + resolve and return the element matching the name. + :param kwargs: additional options used by XSD 1.1 xs:any wildcards. + """ + if not self.is_matching(name, default_namespace, **kwargs): + return + elif not resolve: + return self + + try: + if name[0] != '{' and default_namespace: + return self.maps.lookup_element('{%s}%s' % (default_namespace, name)) + else: + return self.maps.lookup_element(name) + except LookupError: + pass def __iter__(self): return iter(()) @@ -213,16 +444,18 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): return iter(()) def iter_decode(self, elem, validation='lax', **kwargs): - if self.process_contents == 'skip': - return + if self.is_matching(elem.tag): + if self.process_contents == 'skip': + return - namespace = get_namespace(elem.tag) - if self.is_namespace_allowed(namespace): - self._load_namespace(namespace) + self._load_namespace(get_namespace(elem.tag)) try: xsd_element = self.maps.lookup_element(elem.tag) except LookupError: - if self.process_contents == 'strict' and validation != 'skip': + if kwargs.get('drop_results'): + # Validation-only mode: use anyType for decode a complex element. + yield self.any_type.decode(elem) if len(elem) > 0 else elem.text + elif self.process_contents == 'strict' and validation != 'skip': reason = "element %r not found." % elem.tag yield self.validation_error(validation, reason, elem, **kwargs) else: @@ -238,6 +471,7 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): name, value = obj namespace = get_namespace(name) + if self.is_namespace_allowed(namespace): self._load_namespace(namespace) try: @@ -253,84 +487,93 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): reason = "element %r not allowed here." % name yield self.validation_error(validation, reason, value, **kwargs) - def overlap(self, other): + def is_overlap(self, other): if not isinstance(other, XsdAnyElement): - return other.overlap(self) + return other.is_overlap(self) + elif self.not_namespace: + if other.not_namespace: + return True + elif '##any' in other.namespace: + return True + elif '##other' in other.namespace: + return True + else: + return any(ns not in self.not_namespace for ns in other.namespace) + elif other.not_namespace: + if '##any' in self.namespace: + return True + elif '##other' in self.namespace: + return True + else: + return any(ns not in other.not_namespace for ns in self.namespace) elif self.namespace == other.namespace: return True - elif self.namespace == '##any' or other.namespace == '##any': + elif '##any' in self.namespace or '##any' in other.namespace: return True - elif self.namespace == '##other': - return any(not ns.startswith('##') and ns != self.target_namespace for ns in other.namespace.split()) - elif other.namespace == '##other': - return any(not ns.startswith('##') and ns != other.target_namespace for ns in self.namespace.split()) + elif '##other' in self.namespace: + return any(ns and ns != self.target_namespace for ns in other.namespace) + elif '##other' in other.namespace: + return any(ns and ns != other.target_namespace for ns in self.namespace) + else: + return any(ns in self.namespace for ns in other.namespace) - any_namespaces = self.namespace.split() - return any(ns in any_namespaces for ns in other.namespace.split()) + def is_consistent(self, other): + return True class XsdAnyAttribute(XsdWildcard): """ - Class for XSD 1.0 'anyAttribute' wildcards. + Class for XSD 1.0 *anyAttribute* wildcards. - - Content: (annotation?) - + .. + Content: (annotation?) + """ - _admitted_tags = {XSD_ANY_ATTRIBUTE} + _ADMITTED_TAGS = {XSD_ANY_ATTRIBUTE} - def extend_namespace(self, other): - if self.namespace == '##any' or self.namespace == other.namespace: - return - elif other.namespace == '##any': - self.namespace = other.namespace - return - elif other.namespace == '##other': - w1, w2 = other, self - elif self.namespace == '##other': - w1, w2 = self, other - elif self.target_namespace == other.target_namespace: - self.namespace = ' '.join(set(other.namespace.split() + self.namespace.split())) - return - else: - self.namespace = ' '.join(set(list(other.iter_namespaces()) + self.namespace.split())) - return + def match(self, name, default_namespace=None, resolve=False, **kwargs): + """ + Returns the attribute wildcard if name is matching the name provided + as argument, `None` otherwise. - namespaces = set(w2.iter_namespaces()) - if w1.target_namespace in namespaces and '' in namespaces: - self.namespace = '##any' - elif '' not in namespaces and w1.target_namespace == w2.target_namespace: - self.namespace = '##other' - else: - msg = "not expressible wildcard namespace union: {!r} V {!r}:" - raise XMLSchemaValueError(msg.format(other.namespace, self.namespace)) + :param name: a local or fully-qualified name. + :param default_namespace: used when it's not `None` and not empty for \ + completing local name arguments. + :param resolve: when `True` it doesn't return the wildcard but try to \ + resolve and return the attribute matching the name. + :param kwargs: additional options that can be used by certain components. + """ + if not self.is_matching(name, default_namespace, **kwargs): + return + elif not resolve: + return self - def match(self, name, default_namespace=None): - if self.is_matching(name, default_namespace): - try: - if name[0] != '{' and default_namespace: - return self.maps.lookup_attribute('{%s}%s' % (default_namespace, name)) - else: - return self.maps.lookup_attribute(name) - except LookupError: - pass + try: + if name[0] != '{' and default_namespace: + return self.maps.lookup_attribute('{%s}%s' % (default_namespace, name)) + else: + return self.maps.lookup_attribute(name) + except LookupError: + pass def iter_decode(self, attribute, validation='lax', **kwargs): - if self.process_contents == 'skip': - return - name, value = attribute - namespace = get_namespace(name) - if self.is_namespace_allowed(namespace): - self._load_namespace(namespace) + if self.is_matching(name): + if self.process_contents == 'skip': + return + + self._load_namespace(get_namespace(name)) try: xsd_attribute = self.maps.lookup_attribute(name) except LookupError: - if self.process_contents == 'strict' and validation != 'skip': + if kwargs.get('drop_results'): + # Validation-only mode: returns the value if a decoder is not found. + yield value + elif self.process_contents == 'strict' and validation != 'skip': reason = "attribute %r not found." % name yield self.validation_error(validation, reason, attribute, **kwargs) else: @@ -364,105 +607,130 @@ class XsdAnyAttribute(XsdWildcard): class Xsd11AnyElement(XsdAnyElement): """ - Class for XSD 1.1 'any' declarations. + Class for XSD 1.1 *any* declarations. - - Content: (annotation?) - + .. + Content: (annotation?) + """ def _parse(self): super(Xsd11AnyElement, self)._parse() + self._parse_not_constraints() - # Parse notNamespace attribute - try: - not_namespace = self.elem.attrib['notNamespace'].strip().split() - except KeyError: - pass - else: - if 'namespace' in self.elem.attrib: - self.parse_error("'namespace' and 'notNamespace' attributes are mutually exclusive.") - elif not all(not s.startswith('##') or s in {'##local', '##targetNamespace'} for s in not_namespace): - self.parse_error("wrong value %r for 'notNamespace' attribute." % self.elem.attrib['notNamespace']) - else: - self.not_namespace = not_namespace + def is_matching(self, name, default_namespace=None, group=None, occurs=None): + """ + Returns `True` if the component name is matching the name provided as argument, + `False` otherwise. For XSD elements the matching is extended to substitutes. - # Parse notQName attribute - try: - not_qname = self.elem.attrib['notQName'].strip().split() - except KeyError: - pass + :param name: a local or fully-qualified name. + :param default_namespace: used if it's not None and not empty for completing \ + the name argument in case it's a local name. + :param group: used only by XSD 1.1 any element wildcards to verify siblings in \ + case of ##definedSibling value in notQName attribute. + :param occurs: a Counter instance for verify model occurrences counting. + """ + if name is None: + return False + elif not name or name[0] == '{': + namespace = get_namespace(name) + elif default_namespace is None: + namespace = '' else: - if not all(not s.startswith('##') or s in {'##defined', '##definedSibling'} for s in not_qname): - self.parse_error("wrong value %r for 'notQName' attribute." % self.elem.attrib['notQName']) - else: - self.not_qname = not_qname + name = '{%s}%s' % (default_namespace, name) + namespace = default_namespace + + if group in self.precedences: + if occurs is None: + if any(e.is_matching(name) for e in self.precedences[group]): + return False + elif any(e.is_matching(name) and not e.is_over(occurs[e]) for e in self.precedences[group]): + return False + + if '##defined' in self.not_qname and name in self.maps.elements: + return False + if group and '##definedSibling' in self.not_qname: + if any(e.is_matching(name) for e in group.iter_elements() + if not isinstance(e, XsdAnyElement)): + return False + return name not in self.not_qname and self.is_namespace_allowed(namespace) + + def is_consistent(self, other): + if isinstance(other, XsdAnyElement) or self.process_contents == 'skip': + return True + xsd_element = self.match(other.name, other.default_namespace, resolve=True) + return xsd_element is None or other.is_consistent(xsd_element, strict=False) + + def add_precedence(self, other, group): + if not self.precedences: + self.precedences = {} + try: + self.precedences[group].append(other) + except KeyError: + self.precedences[group] = [other] class Xsd11AnyAttribute(XsdAnyAttribute): """ - Class for XSD 1.1 'anyAttribute' declarations. + Class for XSD 1.1 *anyAttribute* declarations. - - Content: (annotation?) - + .. + Content: (annotation?) + """ + inheritable = False # Added for reduce checkings on XSD 1.1 attributes + def _parse(self): super(Xsd11AnyAttribute, self)._parse() + self._parse_not_constraints() - # Parse notNamespace attribute - try: - not_namespace = self.elem.attrib['notNamespace'].strip().split() - except KeyError: - pass + def is_matching(self, name, default_namespace=None, **kwargs): + if name is None: + return False + elif not name or name[0] == '{': + namespace = get_namespace(name) + elif default_namespace is None: + namespace = '' else: - if 'namespace' in self.elem.attrib: - self.parse_error("'namespace' and 'notNamespace' attributes are mutually exclusive.") - elif not all(not s.startswith('##') or s in {'##local', '##targetNamespace'} for s in not_namespace): - self.parse_error("wrong value %r for 'notNamespace' attribute." % self.elem.attrib['notNamespace']) - else: - self.not_namespace = not_namespace + name = '{%s}%s' % (default_namespace, name) + namespace = default_namespace - # Parse notQName attribute - try: - not_qname = self.elem.attrib['notQName'].strip().split() - except KeyError: - pass - else: - if not all(not s.startswith('##') or s == '##defined' for s in not_qname): - self.parse_error("wrong value %r for 'notQName' attribute." % self.elem.attrib['notQName']) - else: - self.not_qname = not_qname + if '##defined' in self.not_qname and name in self.maps.attributes: + return False + return name not in self.not_qname and self.is_namespace_allowed(namespace) class XsdOpenContent(XsdComponent): """ - Class for XSD 1.1 'openContent' model definitions. + Class for XSD 1.1 *openContent* model definitions. - - Content: (annotation?), (any?) - + .. + Content: (annotation?), (any?) + """ - _admitted_tags = {XSD_OPEN_CONTENT} + _ADMITTED_TAGS = {XSD_OPEN_CONTENT} mode = 'interleave' any_element = None + def __init__(self, elem, schema, parent): + super(XsdOpenContent, self).__init__(elem, schema, parent) + def __repr__(self): return '%s(mode=%r)' % (self.__class__.__name__, self.mode) @@ -473,31 +741,51 @@ class XsdOpenContent(XsdComponent): except KeyError: pass else: - if self.mode not in ('none', 'interleave', 'suffix'): + if self.mode not in {'none', 'interleave', 'suffix'}: self.parse_error("wrong value %r for 'mode' attribute." % self.mode) - child = self._parse_component(self.elem) - if child is not None and child.tag == XSD_ANY: - self.any_element = Xsd11AnyElement(child, self.schema, self) + child = self._parse_child_component(self.elem) + if self.mode == 'none': + if child is not None and child.tag == XSD_ANY: + self.parse_error("an openContent with mode='none' must not has an child declaration") + elif child is None or child.tag != XSD_ANY: + self.parse_error("an child declaration is required") + else: + any_element = Xsd11AnyElement(child, self.schema, self) + any_element.min_occurs = 0 + any_element.max_occurs = None + self.any_element = any_element @property def built(self): return True + def is_restriction(self, other): + if self.mode == 'none' or other is None or other.mode == 'none': + return True + elif self.mode == 'interleave' and other.mode == 'suffix': + return False + else: + return self.any_element.is_restriction(other.any_element) + class XsdDefaultOpenContent(XsdOpenContent): """ - Class for XSD 1.1 'defaultOpenContent' model definitions. + Class for XSD 1.1 *defaultOpenContent* model definitions. - - Content: (annotation?, any) - + .. + Content: (annotation?, any) + """ - _admitted_tags = {XSD_DEFAULT_OPEN_CONTENT} + _ADMITTED_TAGS = {XSD_DEFAULT_OPEN_CONTENT} + applies_to_empty = False + + def __init__(self, elem, schema): + super(XsdOpenContent, self).__init__(elem, schema) def _parse(self): super(XsdDefaultOpenContent, self)._parse() @@ -505,5 +793,8 @@ class XsdDefaultOpenContent(XsdOpenContent): self.parse_error("defaultOpenContent must be a child of the schema") if self.mode == 'none': self.parse_error("the attribute 'mode' of a defaultOpenContent cannot be 'none'") - if self._parse_component(self.elem) is None: + if self._parse_child_component(self.elem) is None: self.parse_error("a defaultOpenContent declaration cannot be empty") + + if self._parse_boolean_attribute('appliesToEmpty'): + self.applies_to_empty = True diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index 0f5f9fe..13393ee 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -16,10 +16,14 @@ import re from ..compat import PY3, string_base_type, unicode_type from ..exceptions import XMLSchemaValueError, XMLSchemaTypeError -from ..qnames import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, XML_LANG, XSD_ANY_TYPE, XSD_ID -from ..helpers import get_qname, local_name, qname_to_prefixed, iter_xsd_components, get_xsd_component -from ..etree import etree_tostring, is_etree_element -from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchemaDecodeError, XMLSchemaEncodeError +from ..qnames import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, XML_LANG, \ + XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ID, XSD_OVERRIDE, \ + get_qname, local_name, qname_to_prefixed +from ..etree import etree_tostring +from ..helpers import is_etree_element +from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, \ + XMLSchemaDecodeError, XMLSchemaEncodeError + XSD_VALIDATION_MODES = {'strict', 'lax', 'skip'} """ @@ -43,6 +47,8 @@ class XsdValidator(object): :ivar errors: XSD validator building errors. :vartype errors: list """ + xsd_version = None + def __init__(self, validation='strict'): if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError("validation argument can be 'strict', 'lax' or 'skip': %r" % validation) @@ -62,7 +68,9 @@ class XsdValidator(object): @property def built(self): """ - Property that is ``True`` if schema validator has been fully parsed and built, ``False`` otherwise. + Property that is ``True`` if XSD validator has been fully parsed and built, + ``False`` otherwise. For schemas the property is checked on all global + components. For XSD components check only the building of local subcomponents. """ raise NotImplementedError @@ -84,7 +92,7 @@ class XsdValidator(object): | https://www.w3.org/TR/xmlschema-1/#e-validity | https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#e-validity """ - if self.errors or any([comp.errors for comp in self.iter_components()]): + if self.errors or any(comp.errors for comp in self.iter_components()): return 'invalid' elif self.built: return 'valid' @@ -119,7 +127,7 @@ class XsdValidator(object): __copy__ = copy - def parse_error(self, error, elem=None): + def parse_error(self, error, elem=None, validation=None): """ Helper method for registering parse errors. Does nothing if validation mode is 'skip'. Il validation mode is 'lax' collects the error, otherwise raise the error. @@ -127,8 +135,11 @@ class XsdValidator(object): :param error: can be a parse error or an error message. :param elem: the Element instance related to the error, for default uses the 'elem' \ attribute of the validator, if it's present. + :param validation: overrides the default validation mode of the validator. """ - if self.validation == 'skip': + if validation not in XSD_VALIDATION_MODES: + validation = self.validation + if validation == 'skip': return if is_etree_element(elem): @@ -144,13 +155,16 @@ class XsdValidator(object): error.elem = elem error.source = getattr(self, 'source', None) elif isinstance(error, Exception): - error = XMLSchemaParseError(self, unicode_type(error).strip('\'" '), elem) + message = unicode_type(error).strip() + if message[0] in '\'"' and message[0] == message[-1]: + message = message.strip('\'"') + error = XMLSchemaParseError(self, message, elem) elif isinstance(error, string_base_type): error = XMLSchemaParseError(self, error, elem) else: raise XMLSchemaValueError("'error' argument must be an exception or a string, not %r." % error) - if self.validation == 'lax': + if validation == 'lax': self.errors.append(error) else: raise error @@ -196,10 +210,11 @@ class XsdComponent(XsdValidator): """ _REGEX_SPACE = re.compile(r'\s') _REGEX_SPACES = re.compile(r'\s+') - _admitted_tags = () + _ADMITTED_TAGS = () parent = None name = None + ref = None qualified = True def __init__(self, elem, schema, parent=None, name=None): @@ -217,11 +232,11 @@ class XsdComponent(XsdValidator): if name == "elem": if not is_etree_element(value): raise XMLSchemaTypeError("%r attribute must be an Etree Element: %r" % (name, value)) - elif value.tag not in self._admitted_tags: + elif value.tag not in self._ADMITTED_TAGS: raise XMLSchemaValueError( "wrong XSD element %r for %r, must be one of %r." % ( local_name(value.tag), self, - [local_name(tag) for tag in self._admitted_tags] + [local_name(tag) for tag in self._ADMITTED_TAGS] ) ) super(XsdComponent, self).__setattr__(name, value) @@ -236,10 +251,19 @@ class XsdComponent(XsdValidator): super(XsdComponent, self).__setattr__(name, value) @property + def xsd_version(self): + return self.schema.XSD_VERSION + def is_global(self): - """Is `True` if the instance is a global component, `False` if it's local.""" + """Returns `True` if the instance is a global component, `False` if it's local.""" return self.parent is None + def is_override(self): + """Returns `True` if the instance is an override of a global component.""" + if self.parent is not None: + return False + return any(self.elem in x for x in self.schema.root if x.tag == XSD_OVERRIDE) + @property def schema_elem(self): """The reference element of the schema for the component instance.""" @@ -270,9 +294,26 @@ class XsdComponent(XsdValidator): """Property that references to schema's global maps.""" return self.schema.maps + @property + def any_type(self): + """Property that references to the xs:anyType instance of the global maps.""" + return self.schema.maps.types[XSD_ANY_TYPE] + + @property + def any_simple_type(self): + """Property that references to the xs:anySimpleType instance of the global maps.""" + return self.schema.maps.types[XSD_ANY_SIMPLE_TYPE] + + @property + def any_atomic_type(self): + """Property that references to the xs:anyAtomicType instance of the global maps.""" + return self.schema.maps.types[XSD_ANY_ATOMIC_TYPE] + def __repr__(self): if self.name is None: return '<%s at %#x>' % (self.__class__.__name__, id(self)) + elif self.ref is not None: + return '%s(ref=%r)' % (self.__class__.__name__, self.prefixed_name) else: return '%s(name=%r)' % (self.__class__.__name__, self.prefixed_name) @@ -286,56 +327,87 @@ class XsdComponent(XsdValidator): except (TypeError, IndexError): self.annotation = None - def _parse_component(self, elem, required=True, strict=True): - try: - return get_xsd_component(elem, required, strict) - except XMLSchemaValueError as err: - self.parse_error(err, elem) - - def _iterparse_components(self, elem, start=0): - try: - for obj in iter_xsd_components(elem, start): - yield obj - except XMLSchemaValueError as err: - self.parse_error(err, elem) - - def _parse_attribute(self, elem, name, values, default=None): - value = elem.get(name, default) - if value not in values: - self.parse_error("wrong value {} for {} attribute.".format(value, name)) - return default - return value - - def _parse_properties(self, *properties): - for name in properties: + def _parse_reference(self): + """ + Helper method for referable components. Returns `True` if a valid reference QName + is found without any error, otherwise returns `None`. Sets an id-related name for + the component ('nameless_') if both the attributes 'ref' and + 'name' are missing. + """ + ref = self.elem.get('ref') + if ref is None: + if 'name' in self.elem.attrib: + return + elif self.parent is None: + self.parse_error("missing attribute 'name' in a global %r" % type(self)) + else: + self.parse_error("missing both attributes 'name' and 'ref' in local %r" % type(self)) + self.name = 'nameless_%s' % str(id(self)) + elif self.parent is None: + self.parse_error("attribute 'ref' not allowed in a global %r" % type(self)) + elif 'name' in self.elem.attrib: + self.parse_error("attributes 'name' and 'ref' are mutually exclusive") + else: try: - getattr(self, name) - except (ValueError, TypeError) as err: - self.parse_error(str(err)) + self.name = self.schema.resolve_qname(ref) + except (KeyError, ValueError, RuntimeError) as err: + self.parse_error(err) + else: + if self._parse_child_component(self.elem) is not None: + self.parse_error("a reference component cannot has child definitions/declarations") + return True + + def _parse_boolean_attribute(self, name): + try: + value = self.elem.attrib[name].strip() + except KeyError: + return + else: + if value in ('true', '1'): + return True + elif value in ('false', '0'): + return False + else: + self.parse_error("wrong value %r for boolean attribute %r" % (value, name)) + + def _parse_child_component(self, elem, strict=True): + child = None + for index, child in enumerate(filter(lambda x: x.tag != XSD_ANNOTATION, elem)): + if not strict: + return child + elif index: + msg = "too many XSD components, unexpected {!r} found at position {}" + self.parse_error(msg.format(child, index), elem) + return child def _parse_target_namespace(self): """ XSD 1.1 targetNamespace attribute in elements and attributes declarations. """ - self._target_namespace = self.elem.get('targetNamespace') - if self._target_namespace is not None: - if 'name' not in self.elem.attrib: - self.parse_error("attribute 'name' must be present when 'targetNamespace' attribute is provided") - if 'form' in self.elem.attrib: - self.parse_error("attribute 'form' must be absent when 'targetNamespace' attribute is provided") - if self.elem.attrib['targetNamespace'].strip() != self.schema.target_namespace: - parent = self.parent - if parent is None: - self.parse_error("a global attribute must has the same namespace as its parent schema") - elif not isinstance(parent, XsdType) or not parent.is_complex() or parent.derivation != 'restriction': - self.parse_error("a complexType restriction required for parent, found %r" % self.parent) - elif self.parent.base_type.name == XSD_ANY_TYPE: - pass + if 'targetNamespace' not in self.elem.attrib: + return - elif self.qualified: - self._target_namespace = self.schema.target_namespace + self._target_namespace = self.elem.attrib['targetNamespace'].strip() + if 'name' not in self.elem.attrib: + self.parse_error("attribute 'name' must be present when 'targetNamespace' attribute is provided") + if 'form' in self.elem.attrib: + self.parse_error("attribute 'form' must be absent when 'targetNamespace' attribute is provided") + if self._target_namespace != self.schema.target_namespace: + if self.parent is None: + self.parse_error("a global attribute must has the same namespace as its parent schema") + + xsd_type = self.get_parent_type() + if xsd_type and xsd_type.parent is None and \ + (xsd_type.derivation != 'restriction' or xsd_type.base_type is self.any_type): + self.parse_error("a declaration contained in a global complexType " + "must has the same namespace as its parent schema") + + if not self._target_namespace and self.name[0] == '{': + self.name = local_name(self.name) + elif self.name[0] != '{': + self.name = '{%s}%s' % (self._target_namespace, self.name) else: - self._target_namespace = '' + self.name = '{%s}%s' % (self._target_namespace, local_name(self.name)) @property def local_name(self): @@ -362,13 +434,15 @@ class XsdComponent(XsdValidator): def built(self): raise NotImplementedError - def is_matching(self, name, default_namespace=None): + def is_matching(self, name, default_namespace=None, **kwargs): """ - Returns `True` if the component name is matching the name provided as argument, `False` otherwise. + Returns `True` if the component name is matching the name provided as argument, + `False` otherwise. For XSD elements the matching is extended to substitutes. :param name: a local or fully-qualified name. :param default_namespace: used if it's not None and not empty for completing the name \ argument in case it's a local name. + :param kwargs: additional options that can be used by certain components. """ if not name: return self.name == name @@ -380,9 +454,9 @@ class XsdComponent(XsdValidator): qname = '{%s}%s' % (default_namespace, name) return self.qualified_name == qname or not self.qualified and self.local_name == name - def match(self, name, default_namespace=None): + def match(self, name, default_namespace=None, **kwargs): """Returns the component if its name is matching the name provided as argument, `None` otherwise.""" - return self if self.is_matching(name, default_namespace) else None + return self if self.is_matching(name, default_namespace, **kwargs) else None def get_global(self): """Returns the global XSD component that contains the component instance.""" @@ -394,6 +468,17 @@ class XsdComponent(XsdValidator): return component component = component.parent + def get_parent_type(self): + """ + Returns the nearest XSD type that contains the component instance, + or `None` if the component doesn't have an XSD type parent. + """ + component = self.parent + while component is not self and component is not None: + if isinstance(component, XsdType): + return component + component = component.parent + def iter_components(self, xsd_classes=None): """ Creates an iterator for XSD subcomponents. @@ -433,28 +518,31 @@ class XsdComponent(XsdValidator): class XsdAnnotation(XsdComponent): """ - Class for XSD 'annotation' definitions. + Class for XSD *annotation* definitions. - - Content: (appinfo | documentation)* - + :ivar appinfo: a list containing the xs:appinfo children. + :ivar documentation: a list containing the xs:documentation children. - - Content: ({any})* - + .. + Content: (appinfo | documentation)* + - - Content: ({any})* - + .. + Content: ({any})* + + + .. + Content: ({any})* + """ - _admitted_tags = {XSD_ANNOTATION} + _ADMITTED_TAGS = {XSD_ANNOTATION} @property def built(self): @@ -478,7 +566,10 @@ class XsdAnnotation(XsdComponent): class XsdType(XsdComponent): + """Common base class for XSD types.""" + abstract = False + block = None base_type = None derivation = None redefine = None @@ -492,33 +583,6 @@ class XsdType(XsdComponent): def built(self): raise NotImplementedError - @staticmethod - def is_simple(): - raise NotImplementedError - - @staticmethod - def is_complex(): - raise NotImplementedError - - @staticmethod - def is_atomic(): - return None - - def is_empty(self): - raise NotImplementedError - - def is_emptiable(self): - raise NotImplementedError - - def has_simple_content(self): - raise NotImplementedError - - def has_mixed_content(self): - raise NotImplementedError - - def is_element_only(self): - raise NotImplementedError - @property def content_type_label(self): if self.is_empty(): @@ -532,12 +596,98 @@ class XsdType(XsdComponent): else: return 'unknown' + @property + def root_type(self): + """The root type of the type definition hierarchy. Is itself for a root type.""" + if self.base_type is None: + return self # Note that a XsdUnion type is always considered a root type + + try: + if self.base_type.is_simple(): + return self.base_type.primitive_type + else: + return self.base_type.content_type.primitive_type + except AttributeError: + # The type has complex or XsdList content + return self.base_type + + @staticmethod + def is_simple(): + """Returns `True` if the instance is a simpleType, `False` otherwise.""" + raise NotImplementedError + + @staticmethod + def is_complex(): + """Returns `True` if the instance is a complexType, `False` otherwise.""" + raise NotImplementedError + + @staticmethod + def is_atomic(): + """Returns `True` if the instance is an atomic simpleType, `False` otherwise.""" + return False + + @staticmethod + def is_datetime(): + """ + Returns `True` if the instance is a datetime/duration XSD builtin-type, `False` otherwise. + """ + return False + + def is_empty(self): + """Returns `True` if the instance has an empty value or content, `False` otherwise.""" + raise NotImplementedError + + def is_emptiable(self): + """Returns `True` if the instance has an emptiable value or content, `False` otherwise.""" + raise NotImplementedError + + def has_simple_content(self): + """ + Returns `True` if the instance is a simpleType or a complexType with simple + content, `False` otherwise. + """ + raise NotImplementedError + + def has_mixed_content(self): + """ + Returns `True` if the instance is a complexType with mixed content, `False` otherwise. + """ + raise NotImplementedError + + def is_element_only(self): + """ + Returns `True` if the instance is a complexType with element-only content, `False` otherwise. + """ + raise NotImplementedError + def is_derived(self, other, derivation=None): raise NotImplementedError + def is_blocked(self, xsd_element): + """ + Returns `True` if the base type derivation is blocked, `False` otherwise. + """ + xsd_type = xsd_element.type + if self is xsd_type: + return False + + block = ('%s %s' % (xsd_element.block, xsd_type.block)).strip() + if not block: + return False + block = {x for x in block.split() if x in ('extension', 'restriction')} + + return any(self.is_derived(xsd_type, derivation) for derivation in block) + + def is_dynamic_consistent(self, other): + return self.is_derived(other) or hasattr(other, 'member_types') and \ + any(self.is_derived(mt) for mt in other.member_types) + def is_key(self): return self.name == XSD_ID or self.is_derived(self.maps.types[XSD_ID]) + def text_decode(self, text): + raise NotImplementedError + class ValidationMixin(object): """ @@ -798,6 +948,14 @@ class ParticleMixin(object): else: return self.max_occurs <= other.max_occurs + @property + def effective_min_occurs(self): + return self.min_occurs + + @property + def effective_max_occurs(self): + return self.max_occurs + ### # Methods used by XSD components def parse_error(self, *args, **kwargs): diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 6b8f0d2..8a215da 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -13,16 +13,18 @@ This module defines a mixin class for enabling XPath on schemas. """ from __future__ import unicode_literals from abc import abstractmethod -from elementpath import XPath2Parser, XPathContext +from elementpath import XPath2Parser, XPathSchemaContext, AbstractSchemaProxy from .compat import Sequence from .qnames import XSD_SCHEMA +from .namespaces import XSD_NAMESPACE +from .exceptions import XMLSchemaValueError, XMLSchemaTypeError -class ElementPathContext(XPathContext): +class XMLSchemaContext(XPathSchemaContext): """ - XPath dynamic context class for XMLSchema. Implements safe iteration methods for - schema elements that recognize circular references. + XPath dynamic context class for *xmlschema* library. Implements safe iteration + methods for schema elements that recognize circular references. """ def _iter_descendants(self): def safe_iter_descendants(context): @@ -34,7 +36,7 @@ class ElementPathContext(XPathContext): if len(elem): context.size = len(elem) for context.position, context.item in enumerate(elem): - if context.item.is_global: + if context.item.parent is None: for item in safe_iter_descendants(context): yield item elif getattr(context.item, 'ref', None) is not None: @@ -62,7 +64,7 @@ class ElementPathContext(XPathContext): if len(elem): context.size = len(elem) for context.position, context.item in enumerate(elem): - if context.item.is_global: + if context.item.parent is None: for item in safe_iter_context(context): yield item elif getattr(context.item, 'ref', None) is not None: @@ -76,6 +78,96 @@ class ElementPathContext(XPathContext): return safe_iter_context(self) +class XMLSchemaProxy(AbstractSchemaProxy): + """XPath schema proxy for the *xmlschema* library.""" + def __init__(self, schema=None, base_element=None): + if schema is None: + from xmlschema import XMLSchema + schema = XMLSchema.meta_schema + super(XMLSchemaProxy, self).__init__(schema, base_element) + + if base_element is not None: + try: + if base_element.schema is not schema: + raise XMLSchemaValueError("%r is not an element of %r" % (base_element, schema)) + except AttributeError: + raise XMLSchemaTypeError("%r is not an XsdElement" % base_element) + + def bind_parser(self, parser): + if parser.schema is not self: + parser.schema = self + + try: + parser.symbol_table = self._schema.xpath_tokens[parser.__class__] + except KeyError: + parser.symbol_table = parser.__class__.symbol_table.copy() + self._schema.xpath_tokens[parser.__class__] = parser.symbol_table + for xsd_type in self.iter_atomic_types(): + parser.schema_constructor(xsd_type.name) + + parser.tokenizer = parser.create_tokenizer(parser.symbol_table) + + def get_context(self): + return XMLSchemaContext(root=self._schema, item=self._base_element) + + def get_type(self, qname): + try: + return self._schema.maps.types[qname] + except KeyError: + return None + + def get_attribute(self, qname): + try: + return self._schema.maps.attributes[qname] + except KeyError: + return None + + def get_element(self, qname): + try: + return self._schema.maps.elements[qname] + except KeyError: + return None + + def get_substitution_group(self, qname): + try: + return self._schema.maps.substitution_groups[qname] + except KeyError: + return None + + def find(self, path, namespaces=None): + return self._schema.find(path, namespaces) + + def is_instance(self, obj, type_qname): + xsd_type = self._schema.maps.types[type_qname] + try: + xsd_type.encode(obj) + except ValueError: + return False + else: + return True + + def cast_as(self, obj, type_qname): + xsd_type = self._schema.maps.types[type_qname] + return xsd_type.decode(obj) + + def iter_atomic_types(self): + for xsd_type in self._schema.maps.types.values(): + if xsd_type.target_namespace != XSD_NAMESPACE and hasattr(xsd_type, 'primitive_type'): + yield xsd_type + + def get_primitive_type(self, xsd_type): + if not xsd_type.is_simple(): + return self._schema.maps.types['{%s}anyType' % XSD_NAMESPACE] + elif not hasattr(xsd_type, 'primitive_type'): + if xsd_type.base_type is None: + return xsd_type + return self.get_primitive_type(xsd_type.base_type) + elif xsd_type.primitive_type is not xsd_type: + return self.get_primitive_type(xsd_type.primitive_type) + else: + return xsd_type + + class ElementPathMixin(Sequence): """ Mixin abstract class for enabling ElementTree and XPath API on XSD components. @@ -83,12 +175,19 @@ class ElementPathMixin(Sequence): :cvar text: The Element text. Its value is always `None`. For compatibility with the ElementTree API. :cvar tail: The Element tail. Its value is always `None`. For compatibility with the ElementTree API. """ - _attrib = {} text = None tail = None + attributes = {} namespaces = {} xpath_default_namespace = None + _xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use. + + def __getstate__(self): + state = self.__dict__.copy() + state.pop('_xpath_parser', None) + return state + @abstractmethod def __iter__(self): pass @@ -113,48 +212,62 @@ class ElementPathMixin(Sequence): @property def attrib(self): """Returns the Element attributes. For compatibility with the ElementTree API.""" - return getattr(self, 'attributes', self._attrib) + return self.attributes def get(self, key, default=None): """Gets an Element attribute. For compatibility with the ElementTree API.""" - return self.attrib.get(key, default) + return self.attributes.get(key, default) - def iterfind(self, path, namespaces=None): - """ - Creates and iterator for all XSD subelements matching the path. + @property + def xpath_proxy(self): + """Returns an XPath proxy instance bound with the schema.""" + raise NotImplementedError - :param path: an XPath expression that considers the XSD component as the root element. - :param namespaces: is an optional mapping from namespace prefix to full name. - :return: an iterable yielding all matching XSD subelements in document order. + def _rebind_xpath_parser(self): + """Rebind XPath 2 parser with schema component.""" + if self._xpath_parser is not None: + self._xpath_parser.schema.bind_parser(self._xpath_parser) + + def _get_xpath_namespaces(self, namespaces=None): """ + Returns a dictionary with namespaces for XPath selection. + + :param namespaces: an optional map from namespace prefix to namespace URI. \ + If this argument is not provided the schema's namespaces are used. + """ + if namespaces is None: + namespaces = {k: v for k, v in self.namespaces.items() if k} + namespaces[''] = self.xpath_default_namespace + elif '' not in namespaces: + namespaces[''] = self.xpath_default_namespace + + xpath_namespaces = XPath2Parser.DEFAULT_NAMESPACES.copy() + xpath_namespaces.update(namespaces) + return xpath_namespaces + + def _xpath_parse(self, path, namespaces=None): path = path.strip() if path.startswith('/') and not path.startswith('//'): path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: - namespaces = {k: v for k, v in self.namespaces.items() if k} - parser = XPath2Parser(namespaces, strict=False, default_namespace=self.xpath_default_namespace) - root_token = parser.parse(path) - context = ElementPathContext(self) - return root_token.select(context) + namespaces = self._get_xpath_namespaces(namespaces) + if self._xpath_parser is None: + self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) + else: + self._xpath_parser.namespaces = namespaces + + return self._xpath_parser.parse(path) def find(self, path, namespaces=None): """ Finds the first XSD subelement matching the path. :param path: an XPath expression that considers the XSD component as the root element. - :param namespaces: an optional mapping from namespace prefix to full name. + :param namespaces: an optional mapping from namespace prefix to namespace URI. :return: The first matching XSD subelement or ``None`` if there is not match. """ - path = path.strip() - if path.startswith('/') and not path.startswith('//'): - path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: - namespaces = {k: v for k, v in self.namespaces.items() if k} - parser = XPath2Parser(namespaces, strict=False, default_namespace=self.xpath_default_namespace) - root_token = parser.parse(path) - context = ElementPathContext(self) - return next(root_token.select(context), None) + context = XMLSchemaContext(self) + return next(self._xpath_parse(path, namespaces).select_results(context), None) def findall(self, path, namespaces=None): """ @@ -165,16 +278,19 @@ class ElementPathMixin(Sequence): :return: a list containing all matching XSD subelements in document order, an empty \ list is returned if there is no match. """ - path = path.strip() - if path.startswith('/') and not path.startswith('//'): - path = ''.join(['/', XSD_SCHEMA, path]) - if namespaces is None: - namespaces = {k: v for k, v in self.namespaces.items() if k} + context = XMLSchemaContext(self) + return self._xpath_parse(path, namespaces).get_results(context) - parser = XPath2Parser(namespaces, strict=False, default_namespace=self.xpath_default_namespace) - root_token = parser.parse(path) - context = ElementPathContext(self) - return root_token.get_results(context) + def iterfind(self, path, namespaces=None): + """ + Creates and iterator for all XSD subelements matching the path. + + :param path: an XPath expression that considers the XSD component as the root element. + :param namespaces: is an optional mapping from namespace prefix to full name. + :return: an iterable yielding all matching XSD subelements in document order. + """ + context = XMLSchemaContext(self) + return self._xpath_parse(path, namespaces).select_results(context) def iter(self, tag=None): """ @@ -187,7 +303,7 @@ class ElementPathMixin(Sequence): if tag is None or elem.is_matching(tag): yield elem for child in elem: - if child.is_global: + if child.parent is None: for e in safe_iter(child): yield e elif getattr(child, 'ref', None) is not None: