From 4085e8daa5600fd880cdc02b768dcfe3d6f91fcd Mon Sep 17 00:00:00 2001
From: Daniel Hillier <daniel.hillier@gmail.com>
Date: Fri, 5 Jul 2019 00:06:34 +1000
Subject: [PATCH 01/34] Add failing test when reloading xml from zipfile
 content.

This problem comes from the way `XMLResource` re-opens files if it wants
to get more information from them. `XMLResource` is deriving the source
location of file like objects from their `name` attr. When that attr
doesn't correspond to a file on disk (zipfile contents, Django files) an
error is raised when `XMLResource.open()` is called.
---
 xmlschema/tests/test_resources.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 038e05f..4022c08 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -15,6 +15,7 @@ This module runs tests concerning resources.
 import unittest
 import os
 import platform
+import zipfile
 
 try:
     from pathlib import PureWindowsPath, PurePath
@@ -26,7 +27,7 @@ from xmlschema import (
     load_xml_resource, XMLResource, XMLSchemaURLError
 )
 from xmlschema.tests import XMLSchemaTestCase, SKIP_REMOTE_TESTS
-from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
+from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO, BytesIO
 from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, is_etree_element, etree_element, py_etree_element
 
 
@@ -383,6 +384,27 @@ class TestResources(XMLSchemaTestCase):
         for schema in vh_schema.maps.iter_schemas():
             self.assertIsInstance(schema.root, etree_element)
 
+    def test_fid_with_name_attr(self):
+        """XMLResource gets correct data when passed a file like object
+        with a name attribute that isn't on disk.
+
+        These file descriptors appear when working with the contents from a
+        zip using the zipfile module and with Django files in some
+        instances.
+        """
+        zipname = "not__on____disk.xml"
+        bytes_fid = BytesIO()
+        with zipfile.ZipFile(bytes_fid, 'w') as zf:
+            with open(self.vh_xml_file) as fid:
+                zf.writestr(zipname, fid.read())
+
+        bytes_fid.seek(0)
+        with zipfile.ZipFile(bytes_fid) as zf:
+            with zf.open(zipname) as fid:
+                resource = XMLResource(fid)
+                # This should not cause an error.
+                resource.load()
+
 
 if __name__ == '__main__':
     from xmlschema.tests import print_test_header

From 61e1f609fcfe1b4acb44d75c416227ef9b56474d Mon Sep 17 00:00:00 2001
From: Daniel Hillier <daniel.hillier@gmail.com>
Date: Fri, 5 Jul 2019 22:23:28 +1000
Subject: [PATCH 02/34] Stop reading `name` and `url` from file object attrs

These attrs shouldn't be used to reopen the file object as:
- they may not reflect the original file or resource (file objects
  opened from a zipfile will have a name that doesn't correspond to any
  file on disk).
- Depending on how the fid was opened, these attrs could be crafted to
  read arbitrary files from disk. If the creator of a .zip gives a file
  inside the zip file a path of `/etc/passwd` we may end up opening that
  file.

Instead of reopening the file, we keep track of the file object and seek
to the beginning of the file. This means (for most operations) the file
object must be seekable. On Python 2 urlopen returns an unseekable
object for 'file://' paths. One test had to be skipped in Python 2 for
this reason.
---
 xmlschema/resources.py            | 98 ++++++++++++++++++-------------
 xmlschema/tests/test_resources.py | 61 +++++++++++++------
 2 files changed, 100 insertions(+), 59 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index b2898f7..be9f1e8 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -245,7 +245,7 @@ class XMLResource(object):
         if base_url is not None and not isinstance(base_url, string_base_type):
             raise XMLSchemaValueError(u"'base_url' argument has to be a string: {!r}".format(base_url))
 
-        self._root = self._document = self._url = self._text = None
+        self._root = self._document = self._url = self._text = self._fid = None
         self._base_url = base_url
         self.defuse = defuse
         self.timeout = timeout
@@ -274,7 +274,7 @@ class XMLResource(object):
 
     def __setattr__(self, name, value):
         if name == 'source':
-            self._root, self._document, self._text, self._url = self._fromsource(value)
+            self._root, self._document, self._text, self._url, self._fid = self._fromsource(value)
         elif name == 'defuse' and value not in DEFUSE_MODES:
             raise XMLSchemaValueError(u"'defuse' attribute: {!r} is not a defuse mode.".format(value))
         elif name == 'timeout' and (not isinstance(value, int) or value <= 0):
@@ -287,16 +287,16 @@ class XMLResource(object):
         url, lazy = None, self._lazy
         if is_etree_element(source):
             self._lazy = False
-            return source, None, None, None  # Source is already an Element --> nothing to load
+            return source, None, None, None, None  # Source is already an Element --> nothing to load
         elif isinstance(source, string_base_type):
             _url, self._url = self._url, None
             try:
                 if lazy:
                     # check if source is a string containing a valid XML root
                     for _, root in self.iterparse(StringIO(source), events=('start',)):
-                        return root, None, source, None
+                        return root, None, source, None, None
                 else:
-                    return self.fromstring(source), None, source, None
+                    return self.fromstring(source), None, source, None, None
             except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
                 if '\n' in source:
                     raise
@@ -309,33 +309,24 @@ class XMLResource(object):
             try:
                 if lazy:
                     for _, root in self.iterparse(source, events=('start',)):
-                        return root, None, source.getvalue(), None
+                        return root, None, source.getvalue(), None, None
                 else:
                     document = self.parse(source)
-                    return document.getroot(), document, source.getvalue(), None
+                    return document.getroot(), document, source.getvalue(), None, None
             finally:
                 self._url = _url
 
         elif hasattr(source, 'read'):
-            # source should be a file-like object
+            _url, self._url = self._url, url
             try:
-                if hasattr(source, 'url'):
-                    url = source.url
+                if lazy:
+                    for _, root in self.iterparse(source, events=('start',)):
+                        return root, None, None, url, source
                 else:
-                    url = normalize_url(source.name)
-            except AttributeError:
-                pass
-            else:
-                _url, self._url = self._url, url
-                try:
-                    if lazy:
-                        for _, root in self.iterparse(source, events=('start',)):
-                            return root, None, None, url
-                    else:
-                        document = self.parse(source)
-                        return document.getroot(), document, None, url
-                finally:
-                    self._url = _url
+                    document = self.parse(source)
+                    return document.getroot(), document, None, url, source
+            finally:
+                self._url = _url
 
         else:
             # Try ElementTree object at last
@@ -346,7 +337,7 @@ class XMLResource(object):
             else:
                 if is_etree_element(root):
                     self._lazy = False
-                    return root, source, None, None
+                    return root, source, None, None, None
 
         if url is None:
             raise XMLSchemaTypeError(
@@ -359,11 +350,11 @@ class XMLResource(object):
             try:
                 if lazy:
                     for _, root in self.iterparse(resource, events=('start',)):
-                        return root, None, None, url
+                        return root, None, None, url, None
                 else:
                     document = self.parse(resource)
                     root = document.getroot()
-                    return root, document, None, url
+                    return root, document, None, url, None
             finally:
                 self._url = _url
                 resource.close()
@@ -482,6 +473,10 @@ class XMLResource(object):
 
     def open(self):
         """Returns a opened resource reader object for the instance URL."""
+        if self._fid is not None:
+            self._fid.seek(0)
+            return self._fid
+
         if self._url is None:
             raise XMLSchemaValueError("can't open, the resource has no URL associated.")
         try:
@@ -494,7 +489,7 @@ class XMLResource(object):
         Loads the XML text from the data source. If the data source is an Element
         the source XML text can't be retrieved.
         """
-        if self._url is None:
+        if self._url is None and self._fid is None:
             return  # Created from Element or text source --> already loaded
 
         resource = self.open()
@@ -503,16 +498,25 @@ class XMLResource(object):
         except (OSError, IOError) as err:
             raise XMLSchemaOSError("cannot load data from %r: %s" % (self._url, err))
         finally:
-            resource.close()
+            # We don't want to close the file obj if it wasn't originally
+            # opened by `XMLResource`. That is the concern of the code
+            # where the file obj came from.
+            if self._fid is None:
+                resource.close()
 
-        try:
-            self._text = data.decode('utf-8') if PY3 else data.encode('utf-8')
-        except UnicodeDecodeError:
-            if PY3:
-                self._text = data.decode('iso-8859-1')
-            else:
-                with codecs.open(urlsplit(self._url).path, mode='rb', encoding='iso-8859-1') as f:
-                    self._text = f.read().encode('iso-8859-1')
+        if isinstance(data, bytes):
+            try:
+                text = data.decode('utf-8') if PY3 else data.encode('utf-8')
+            except UnicodeDecodeError:
+                if PY3:
+                    text = data.decode('iso-8859-1')
+                else:
+                    with codecs.open(urlsplit(self._url).path, mode='rb', encoding='iso-8859-1') as f:
+                        text = f.read().encode('iso-8859-1')
+        else:
+            text = data
+
+        self._text = text
 
     def is_lazy(self):
         """Returns `True` if the XML resource is lazy."""
@@ -528,6 +532,9 @@ class XMLResource(object):
             for elem in self._root.iter(tag):
                 yield elem
             return
+        elif self._fid is not None:
+            self._fid.seek(0)
+            resource = self._fid
         elif self._url is not None:
             resource = urlopen(self._url, timeout=self.timeout)
         else:
@@ -539,7 +546,8 @@ class XMLResource(object):
                     yield elem
                 elem.clear()
         finally:
-            resource.close()
+            if self._fid is None:
+                resource.close()
 
     def iterfind(self, path=None, namespaces=None):
         """XML resource tree iterfind selector."""
@@ -550,6 +558,9 @@ class XMLResource(object):
                 for e in iter_select(self._root, path, namespaces, strict=False):
                     yield e
             return
+        elif self._fid is not None:
+            self._fid.seek(0)
+            resource = self._fid
         elif self._url is not None:
             resource = urlopen(self._url, timeout=self.timeout)
         else:
@@ -587,7 +598,8 @@ class XMLResource(object):
                         elif level == 0:
                             elem.clear()
         finally:
-            resource.close()
+            if self._fid is None:
+                resource.close()
 
     def iter_location_hints(self):
         """Yields schema location hints from the XML tree."""
@@ -639,7 +651,7 @@ class XMLResource(object):
         local_root = self.root.tag[0] != '{'
         nsmap = {}
 
-        if self._url is not None:
+        if self._url is not None or self._fid is not None:
             resource = self.open()
             try:
                 for event, node in self.iterparse(resource, events=('start-ns', 'end')):
@@ -650,7 +662,11 @@ class XMLResource(object):
             except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
                 pass
             finally:
-                resource.close()
+                # We don't want to close the file obj if it wasn't
+                # originally opened by `XMLResource`. That is the concern
+                # of the code where the file obj came from.
+                if self._fid is None:
+                    resource.close()
         elif isinstance(self._text, string_base_type):
             try:
                 for event, node in self.iterparse(StringIO(self._text), events=('start-ns', 'end')):
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 4022c08..b259c5c 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -15,7 +15,6 @@ This module runs tests concerning resources.
 import unittest
 import os
 import platform
-import zipfile
 
 try:
     from pathlib import PureWindowsPath, PurePath
@@ -27,7 +26,7 @@ from xmlschema import (
     load_xml_resource, XMLResource, XMLSchemaURLError
 )
 from xmlschema.tests import XMLSchemaTestCase, SKIP_REMOTE_TESTS
-from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO, BytesIO
+from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
 from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, is_etree_element, etree_element, py_etree_element
 
 
@@ -196,17 +195,22 @@ class TestResources(XMLSchemaTestCase):
         resource.load()
         self.assertIsNone(resource.text)
 
+    @unittest.skipIf(
+        platform.python_version_tuple()[0] < '3',
+        "Skip: urlopen on Python 2 can't seek 'file://' paths."
+    )
     def test_xml_resource_from_resource(self):
         xml_file = urlopen('file://{}'.format(add_leading_slash(self.vh_xml_file)))
         try:
             resource = XMLResource(xml_file)
             self.assertEqual(resource.source, xml_file)
             self.assertEqual(resource.root.tag, '{http://example.com/vehicles}vehicles')
-            self.check_url(resource.url, self.vh_xml_file)
+            self.assertIsNone(resource.url)
             self.assertIsNone(resource.document)
             self.assertIsNone(resource.text)
             resource.load()
             self.assertTrue(resource.text.startswith('<?xml'))
+            self.assertFalse(xml_file.closed)
         finally:
             xml_file.close()
 
@@ -215,21 +219,35 @@ class TestResources(XMLSchemaTestCase):
             resource = XMLResource(schema_file)
             self.assertEqual(resource.source, schema_file)
             self.assertEqual(resource.root.tag, '{http://www.w3.org/2001/XMLSchema}schema')
-            self.check_url(resource.url, self.vh_xsd_file)
+            self.assertIsNone(resource.url)
             self.assertIsNone(resource.document)
             self.assertIsNone(resource.text)
             resource.load()
             self.assertTrue(resource.text.startswith('<xs:schema'))
+            self.assertFalse(schema_file.closed)
+            for _ in resource.iter():
+                pass
+            self.assertFalse(schema_file.closed)
+            for _ in resource.iterfind():
+                pass
+            self.assertFalse(schema_file.closed)
 
         with open(self.vh_xsd_file) as schema_file:
             resource = XMLResource(schema_file, lazy=False)
             self.assertEqual(resource.source, schema_file)
             self.assertEqual(resource.root.tag, '{http://www.w3.org/2001/XMLSchema}schema')
-            self.check_url(resource.url, self.vh_xsd_file)
+            self.assertIsNone(resource.url)
             self.assertIsInstance(resource.document, ElementTree.ElementTree)
             self.assertIsNone(resource.text)
             resource.load()
             self.assertTrue(resource.text.startswith('<xs:schema'))
+            self.assertFalse(schema_file.closed)
+            for _ in resource.iter():
+                pass
+            self.assertFalse(schema_file.closed)
+            for _ in resource.iterfind():
+                pass
+            self.assertFalse(schema_file.closed)
 
     def test_xml_resource_from_string(self):
         with open(self.vh_xsd_file) as schema_file:
@@ -345,13 +363,15 @@ class TestResources(XMLSchemaTestCase):
     def test_xml_resource_get_namespaces(self):
         with open(self.vh_xml_file) as schema_file:
             resource = XMLResource(schema_file)
-            self.assertEqual(resource.url, normalize_url(self.vh_xml_file))
+            self.assertIsNone(resource.url)
             self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'})
+            self.assertFalse(schema_file.closed)
 
         with open(self.vh_xsd_file) as schema_file:
             resource = XMLResource(schema_file)
-            self.assertEqual(resource.url, normalize_url(self.vh_xsd_file))
+            self.assertIsNone(resource.url)
             self.assertEqual(set(resource.get_namespaces().keys()), {'xs', 'vh'})
+            self.assertFalse(schema_file.closed)
 
         resource = XMLResource(self.col_xml_file)
         self.assertEqual(resource.url, normalize_url(self.col_xml_file))
@@ -392,18 +412,23 @@ class TestResources(XMLSchemaTestCase):
         zip using the zipfile module and with Django files in some
         instances.
         """
-        zipname = "not__on____disk.xml"
-        bytes_fid = BytesIO()
-        with zipfile.ZipFile(bytes_fid, 'w') as zf:
-            with open(self.vh_xml_file) as fid:
-                zf.writestr(zipname, fid.read())
+        class FileProxy(object):
+            def __init__(self, fid, fake_name):
+                self._fid = fid
+                self.name = fake_name
 
-        bytes_fid.seek(0)
-        with zipfile.ZipFile(bytes_fid) as zf:
-            with zf.open(zipname) as fid:
-                resource = XMLResource(fid)
-                # This should not cause an error.
-                resource.load()
+            def __getattr__(self, attr):
+                try:
+                    return self.__dict__[attr]
+                except (KeyError, AttributeError):
+                    return getattr(self.__dict__["_fid"], attr)
+
+        fake_name = "not__on____disk.xml"
+        with open(self.vh_xml_file) as schema_file:
+            resource = XMLResource(FileProxy(schema_file, fake_name))
+            self.assertIsNone(resource.url)
+            self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'})
+            self.assertFalse(schema_file.closed)
 
 
 if __name__ == '__main__':

From 8407f091392c91dd5492c30ed82ac4ca5b03779d Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Tue, 15 Oct 2019 21:40:40 +0200
Subject: [PATCH 03/34] Add LOCATION_HINTS dict to namespaces module

  - Use LOCATION_HINTS for schema's fallback locations
---
 doc/conf.py                                   |  2 +-
 publiccode.yml                                |  4 +-
 setup.py                                      |  2 +-
 xmlschema/__init__.py                         |  2 +-
 xmlschema/namespaces.py                       | 24 ++++++++++++
 .../tests/validators/test_schema_class.py     |  5 ++-
 xmlschema/validators/schema.py                | 39 +++++++------------
 7 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index c0eed6c..90a672d 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -62,7 +62,7 @@ author = 'Davide Brunato'
 # The short X.Y version.
 version = '1.0'
 # The full version, including alpha/beta/rc tags.
-release = '1.0.15'
+release = '1.0.16'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/publiccode.yml b/publiccode.yml
index 6ecb1e0..4770e55 100644
--- a/publiccode.yml
+++ b/publiccode.yml
@@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2'
 name: xmlschema
 url: 'https://github.com/sissaschool/xmlschema'
 landingURL: 'https://github.com/sissaschool/xmlschema'
-releaseDate: '2019-10-13'
-softwareVersion: v1.0.15
+releaseDate: '2019-10-XX'
+softwareVersion: v1.0.16
 developmentStatus: stable
 platforms:
   - linux
diff --git a/setup.py b/setup.py
index 603a73e..2e5472c 100755
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,7 @@ class InstallCommand(install):
 
 setup(
     name='xmlschema',
-    version='1.0.15',
+    version='1.0.16',
     install_requires=['elementpath~=1.3.0'],
     packages=['xmlschema'],
     include_package_data=True,
diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py
index e9adc67..d800a17 100644
--- a/xmlschema/__init__.py
+++ b/xmlschema/__init__.py
@@ -30,7 +30,7 @@ from .validators import (
     XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11
 )
 
-__version__ = '1.0.15'
+__version__ = '1.0.16'
 __author__ = "Davide Brunato"
 __contact__ = "brunato@sissa.it"
 __copyright__ = "Copyright 2016-2019, SISSA"
diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py
index beff6c6..44cd453 100644
--- a/xmlschema/namespaces.py
+++ b/xmlschema/namespaces.py
@@ -12,10 +12,13 @@
 This module contains namespace definitions for W3C core standards and namespace related classes.
 """
 from __future__ import unicode_literals
+import os
 import re
 
 from .compat import MutableMapping, Mapping
 
+###
+# Namespace URIs
 XSD_NAMESPACE = 'http://www.w3.org/2001/XMLSchema'
 "URI of the XML Schema Definition namespace (xs|xsd)"
 
@@ -42,6 +45,27 @@ VC_NAMESPACE = 'http://www.w3.org/2007/XMLSchema-versioning'
 "URI of the XML Schema Versioning namespace (vc)"
 
 
+###
+# Schema location hints
+
+SCHEMAS_DIR = os.path.join(os.path.dirname(__file__), 'validators/schemas/')
+
+LOCATION_HINTS = {
+    # Locally saved schemas
+    HFP_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XMLSchema-hasFacetAndProperty_minimal.xsd'),
+    VC_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XMLSchema-versioning_minimal.xsd'),
+    XLINK_NAMESPACE: os.path.join(SCHEMAS_DIR, 'xlink.xsd'),
+    XHTML_NAMESPACE: os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd'),
+
+    # Remote locations: contributors can propose additional official locations
+    # for other namespaces for extending this list.
+    XSLT_NAMESPACE: os.path.join(SCHEMAS_DIR, 'http://www.w3.org/2007/schema-for-xslt20.xsd'),
+}
+
+
+###
+# Helper functions and classes
+
 NAMESPACE_PATTERN = re.compile(r'{([^}]*)}')
 
 
diff --git a/xmlschema/tests/validators/test_schema_class.py b/xmlschema/tests/validators/test_schema_class.py
index fb94fe1..45be457 100644
--- a/xmlschema/tests/validators/test_schema_class.py
+++ b/xmlschema/tests/validators/test_schema_class.py
@@ -13,9 +13,11 @@ from __future__ import print_function, unicode_literals
 import unittest
 import platform
 import warnings
+import os
 
 from xmlschema import XMLSchemaParseError, XMLSchemaIncludeWarning, XMLSchemaImportWarning
 from xmlschema.etree import etree_element
+from xmlschema.namespaces import SCHEMAS_DIR
 from xmlschema.qnames import XSD_ELEMENT, XSI_TYPE
 from xmlschema.tests import SKIP_REMOTE_TESTS, XsdValidatorTestCase
 from xmlschema.validators import XMLSchema11
@@ -113,8 +115,7 @@ class TestXMLSchema10(XsdValidatorTestCase):
         </xs:simpleType>""", XMLSchemaParseError)
 
     def test_base_schemas(self):
-        from xmlschema.validators.schema import XML_SCHEMA_FILE
-        self.schema_class(XML_SCHEMA_FILE)
+        self.schema_class(os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd'))
 
     def test_root_elements(self):
         # Test issue #107 fix
diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
index 321809f..1277d26 100644
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@@ -32,8 +32,8 @@ from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \
     XSD_ALL, XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, \
     XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT
 from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute
-from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, XHTML_NAMESPACE, \
-    XLINK_NAMESPACE, VC_NAMESPACE, NamespaceResourcesMap, NamespaceView
+from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, VC_NAMESPACE, \
+    SCHEMAS_DIR, LOCATION_HINTS, NamespaceResourcesMap, NamespaceView
 from ..etree import etree_element, etree_tostring, prune_etree, ParseError
 from ..resources import is_remote_url, url_path_is_file, fetch_resource, XMLResource
 from ..converters import XMLSchemaConverter
@@ -75,14 +75,6 @@ ANY_ELEMENT = etree_element(
         'maxOccurs': 'unbounded'
     })
 
-# XSD schemas of W3C standards
-SCHEMAS_DIR = os.path.join(os.path.dirname(__file__), 'schemas/')
-XML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd')
-XSI_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-instance_minimal.xsd')
-XLINK_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xlink.xsd')
-XHTML_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'xhtml1-strict.xsd')
-VC_SCHEMA_FILE = os.path.join(SCHEMAS_DIR, 'XMLSchema-versioning_minimal.xsd')
-
 
 class XMLSchemaMeta(ABCMeta):
 
@@ -1024,14 +1016,15 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                 warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3)
             self.imports[namespace] = None
 
-    def import_schema(self, namespace, location, base_url=None, force=False):
+    def import_schema(self, namespace, location, base_url=None, force=False, build=False):
         """
         Imports a schema for an external namespace, from a specific URL.
 
         :param namespace: is the URI of the external namespace.
         :param location: is the URL of the schema.
         :param base_url: is an optional base URL for fetching the schema resource.
-        :param force: is set to `True` imports the schema also if the namespace is already imported.
+        :param force: if set to `True` imports the schema also if the namespace is already imported.
+        :param build: defines when to build the imported schema, the default is to not build.
         :return: the imported :class:`XMLSchema` instance.
         """
         if not force:
@@ -1058,7 +1051,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
             base_url=self.base_url,
             defuse=self.defuse,
             timeout=self.timeout,
-            build=False,
+            build=build,
         )
         if schema.target_namespace != namespace:
             raise XMLSchemaValueError('imported schema %r has an unmatched namespace %r' % (location, namespace))
@@ -1487,13 +1480,10 @@ class XMLSchema10(XMLSchemaBase):
     }
     meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.0/XMLSchema.xsd')
     BASE_SCHEMAS = {
-        XML_NAMESPACE: XML_SCHEMA_FILE,
-        XSI_NAMESPACE: XSI_SCHEMA_FILE,
-    }
-    FALLBACK_LOCATIONS = {
-        XLINK_NAMESPACE: XLINK_SCHEMA_FILE,
-        XHTML_NAMESPACE: XHTML_SCHEMA_FILE,
+        XML_NAMESPACE: os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd'),
+        XSI_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XMLSchema-instance_minimal.xsd'),
     }
+    FALLBACK_LOCATIONS = LOCATION_HINTS
 
 
 # ++++ UNDER DEVELOPMENT, DO NOT USE!!! ++++
@@ -1550,15 +1540,12 @@ class XMLSchema11(XMLSchemaBase):
     }
     meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.1/XMLSchema.xsd')
     BASE_SCHEMAS = {
+        XML_NAMESPACE: os.path.join(SCHEMAS_DIR, 'xml_minimal.xsd'),
+        XSI_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XMLSchema-instance_minimal.xsd'),
         XSD_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSD_1.1/xsd11-extra.xsd'),
-        XML_NAMESPACE: XML_SCHEMA_FILE,
-        XSI_NAMESPACE: XSI_SCHEMA_FILE,
-        VC_NAMESPACE: VC_SCHEMA_FILE,
-    }
-    FALLBACK_LOCATIONS = {
-        XLINK_NAMESPACE: XLINK_SCHEMA_FILE,
-        XHTML_NAMESPACE: XHTML_SCHEMA_FILE,
+        VC_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XMLSchema-versioning_minimal.xsd'),
     }
+    FALLBACK_LOCATIONS = LOCATION_HINTS
 
     def _parse_inclusions(self):
         super(XMLSchema11, self)._parse_inclusions()

From 248b9f9b6875733a73349cf61412683ec29e3fd8 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 16 Oct 2019 15:16:19 +0200
Subject: [PATCH 04/34] Update XSD validation with wildcards

  - Add load_namespace() to XsdGlobals
  . Modify iter_decode/iter_encode for xs:any and xs:anyAttribute
---
 xmlschema/validators/globals_.py  |  59 ++++++++++++-
 xmlschema/validators/wildcards.py | 132 +++++++++++++++++-------------
 2 files changed, 132 insertions(+), 59 deletions(-)

diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py
index d610324..c9716d7 100644
--- a/xmlschema/validators/globals_.py
+++ b/xmlschema/validators/globals_.py
@@ -17,7 +17,7 @@ from collections import Counter
 
 from ..compat import string_base_type
 from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning
-from ..namespaces import XSD_NAMESPACE, NamespaceResourcesMap
+from ..namespaces import XSD_NAMESPACE, LOCATION_HINTS, NamespaceResourcesMap
 from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, \
     XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_GROUP, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, \
     XSD_ELEMENT, XSI_TYPE, get_qname, local_name, qname_to_extended
@@ -203,6 +203,7 @@ class XsdGlobals(XsdValidator):
 
         self.validator = validator
         self.namespaces = NamespaceResourcesMap()  # Registered schemas by namespace URI
+        self.missing_locations = []     # Missing or failing resource locations
 
         self.types = {}                 # Global types (both complex and simple)
         self.attributes = {}            # Global attributes
@@ -384,6 +385,61 @@ class XsdGlobals(XsdValidator):
             elif not any(schema.url == obj.url and schema.__class__ == obj.__class__ for obj in ns_schemas):
                 ns_schemas.append(schema)
 
+    def load_namespace(self, namespace, build=True):
+        """
+        Load namespace from available location hints. Returns `True` if the namespace
+        is already loaded or if the namespace can be loaded from one of the locations,
+        returns `False` otherwise. Failing locations are inserted into the missing
+        locations list.
+
+        :param namespace: the namespace to load.
+        :param build: if left with `True` value builds the maps after load. If the \
+        build fails the resource URL is added to missing locations.
+        """
+        namespace = namespace.strip()
+        if namespace in self.namespaces:
+            return True
+        elif self.validator.meta_schema is None:
+            return False  # Do not load additional namespaces for meta-schema (XHTML)
+
+        # Try from schemas location hints: usually the namespaces related to these
+        # hints are already loaded during schema construction, but it's better to
+        # retry once if the initial load has failed.
+        for schema in self.iter_schemas():
+            for url in schema.get_locations(namespace):
+                if url in self.missing_locations:
+                    continue
+
+                try:
+                    if schema.import_schema(namespace, url, schema.base_url) is not None:
+                        if build:
+                            self.build()
+                except (OSError, IOError):
+                    pass
+                except XMLSchemaNotBuiltError:
+                    self.clear(remove_schemas=True, only_unbuilt=True)
+                    self.missing_locations.append(url)
+                else:
+                    return True
+
+        # Try from library location hint, if there is any.
+        if namespace in LOCATION_HINTS:
+            url = LOCATION_HINTS[namespace]
+            if url not in self.missing_locations:
+                try:
+                    if self.validator.import_schema(namespace, url) is not None:
+                        if build:
+                            self.build()
+                except (OSError, IOError):
+                    return False
+                except XMLSchemaNotBuiltError:
+                    self.clear(remove_schemas=True, only_unbuilt=True)
+                    self.missing_locations.append(url)
+                else:
+                    return True
+
+        return False
+
     def clear(self, remove_schemas=False, only_unbuilt=False):
         """
         Clears the instance maps and schemas.
@@ -415,6 +471,7 @@ class XsdGlobals(XsdValidator):
                 self.namespaces = namespaces
 
         else:
+            self.missing_locations.clear()
             for global_map in self.global_maps:
                 global_map.clear()
             self.substitution_groups.clear()
diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
index ade601b..849c22c 100644
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@@ -13,13 +13,12 @@ This module contains classes for XML Schema wildcards.
 """
 from __future__ import unicode_literals
 
+from ..compat import unicode_type
 from ..exceptions import XMLSchemaValueError
 from ..namespaces import XSI_NAMESPACE
 from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, \
     XSD_DEFAULT_OPEN_CONTENT, get_namespace
 from ..xpath import XMLSchemaProxy, ElementPathMixin
-
-from .exceptions import XMLSchemaNotBuiltError
 from .xsdbase import ValidationMixin, XsdComponent, ParticleMixin
 
 
@@ -129,25 +128,6 @@ class XsdWildcard(XsdComponent, ValidationMixin):
 
         self.not_qname = names
 
-    def _load_namespace(self, namespace):
-        if namespace in self.schema.maps.namespaces:
-            return
-
-        for url in self.schema.get_locations(namespace):
-            try:
-                schema = self.schema.import_schema(namespace, url, base_url=self.schema.base_url)
-                if schema is not None:
-                    try:
-                        schema.maps.build()
-                    except XMLSchemaNotBuiltError:
-                        # Namespace build fails: remove unbuilt schemas and the url hint
-                        schema.maps.clear(remove_schemas=True, only_unbuilt=True)
-                        self.schema.locations[namespace].remove(url)
-                    else:
-                        break
-            except (OSError, IOError):
-                pass
-
     @property
     def built(self):
         return True
@@ -444,48 +424,65 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin):
         return iter(())
 
     def iter_decode(self, elem, validation='lax', **kwargs):
-        if self.is_matching(elem.tag):
-            if self.process_contents == 'skip':
-                return
+        if not self.is_matching(elem.tag):
+            if validation != 'skip':
+                reason = "element %r not allowed here." % elem.tag
+                yield self.validation_error(validation, reason, elem, **kwargs)
 
-            self._load_namespace(get_namespace(elem.tag))
+        elif self.process_contents == 'skip':
+            return
+
+        elif self.maps.load_namespace(get_namespace(elem.tag)):
             try:
                 xsd_element = self.maps.lookup_element(elem.tag)
             except LookupError:
-                if kwargs.get('drop_results'):
-                    # Validation-only mode: use anyType for decode a complex element.
+                if validation == 'skip':
                     yield self.any_type.decode(elem) if len(elem) > 0 else elem.text
-                elif self.process_contents == 'strict' and validation != 'skip':
+                elif self.process_contents == 'strict':
                     reason = "element %r not found." % elem.tag
                     yield self.validation_error(validation, reason, elem, **kwargs)
             else:
                 for result in xsd_element.iter_decode(elem, validation, **kwargs):
                     yield result
-        elif validation != 'skip':
-            reason = "element %r not allowed here." % elem.tag
+
+        elif validation == 'skip':
+            yield self.any_type.decode(elem) if len(elem) > 0 else elem.text
+
+        elif self.process_contents == 'strict':
+            reason = "unavailable namespace {!r}".format(get_namespace(elem.tag))
             yield self.validation_error(validation, reason, elem, **kwargs)
 
     def iter_encode(self, obj, validation='lax', **kwargs):
-        if self.process_contents == 'skip':
-            return
-
         name, value = obj
         namespace = get_namespace(name)
 
-        if self.is_namespace_allowed(namespace):
-            self._load_namespace(namespace)
+        if not self.is_namespace_allowed(namespace):
+            if validation != 'skip':
+                reason = "element %r not allowed here." % name
+                yield self.validation_error(validation, reason, value, **kwargs)
+
+        elif self.process_contents == 'skip':
+            return
+
+        elif self.maps.load_namespace(namespace):
             try:
                 xsd_element = self.maps.lookup_element(name)
             except LookupError:
-                if self.process_contents == 'strict' and validation != 'skip':
+                if validation == 'skip':
+                    yield self.any_type.encode(value)
+                elif self.process_contents == 'strict':
                     reason = "element %r not found." % name
                     yield self.validation_error(validation, reason, **kwargs)
             else:
                 for result in xsd_element.iter_encode(value, validation, **kwargs):
                     yield result
-        elif validation != 'skip':
-            reason = "element %r not allowed here." % name
-            yield self.validation_error(validation, reason, value, **kwargs)
+
+        elif validation == 'skip':
+            yield self.any_type.encode(value)
+
+        elif self.process_contents == 'strict':
+            reason = "unavailable namespace {!r}".format(namespace)
+            yield self.validation_error(validation, reason, **kwargs)
 
     def is_overlap(self, other):
         if not isinstance(other, XsdAnyElement):
@@ -562,47 +559,66 @@ class XsdAnyAttribute(XsdWildcard):
 
     def iter_decode(self, attribute, validation='lax', **kwargs):
         name, value = attribute
-        if self.is_matching(name):
-            if self.process_contents == 'skip':
-                return
 
-            self._load_namespace(get_namespace(name))
+        if not self.is_matching(name):
+            if validation != 'skip':
+                reason = "attribute %r not allowed." % name
+                yield self.validation_error(validation, reason, attribute, **kwargs)
+
+        elif self.process_contents == 'skip':
+            return
+
+        elif self.maps.load_namespace(get_namespace(name)):
             try:
                 xsd_attribute = self.maps.lookup_attribute(name)
             except LookupError:
-                if kwargs.get('drop_results'):
-                    # Validation-only mode: returns the value if a decoder is not found.
+                if validation == 'skip':
                     yield value
-                elif self.process_contents == 'strict' and validation != 'skip':
+                elif self.process_contents == 'strict':
                     reason = "attribute %r not found." % name
                     yield self.validation_error(validation, reason, attribute, **kwargs)
             else:
                 for result in xsd_attribute.iter_decode(value, validation, **kwargs):
                     yield result
-        elif validation != 'skip':
-            reason = "attribute %r not allowed." % name
-            yield self.validation_error(validation, reason, attribute, **kwargs)
+
+        elif validation == 'skip':
+            yield value
+
+        elif self.process_contents == 'strict':
+            reason = "unavailable namespace {!r}".format(get_namespace(name))
+            yield self.validation_error(validation, reason, **kwargs)
 
     def iter_encode(self, attribute, validation='lax', **kwargs):
-        if self.process_contents == 'skip':
-            return
-
         name, value = attribute
         namespace = get_namespace(name)
-        if self.is_namespace_allowed(namespace):
-            self._load_namespace(namespace)
+
+        if not self.is_namespace_allowed(namespace):
+            if validation != 'skip':
+                reason = "attribute %r not allowed." % name
+                yield self.validation_error(validation, reason, attribute, **kwargs)
+
+        elif self.process_contents == 'skip':
+            return
+
+        elif self.maps.load_namespace(namespace):
             try:
                 xsd_attribute = self.maps.lookup_attribute(name)
             except LookupError:
-                if self.process_contents == 'strict' and validation != 'skip':
+                if validation == 'skip':
+                    yield unicode_type(value)
+                elif self.process_contents == 'strict':
                     reason = "attribute %r not found." % name
                     yield self.validation_error(validation, reason, attribute, **kwargs)
             else:
                 for result in xsd_attribute.iter_encode(value, validation, **kwargs):
                     yield result
-        elif validation != 'skip':
-            reason = "attribute %r not allowed." % name
-            yield self.validation_error(validation, reason, attribute, **kwargs)
+
+        elif validation == 'skip':
+            yield unicode_type(value)
+
+        elif self.process_contents == 'strict':
+            reason = "unavailable namespace {!r}".format(get_namespace(name))
+            yield self.validation_error(validation, reason, **kwargs)
 
 
 class Xsd11AnyElement(XsdAnyElement):

From 54060ba0df95eb7209dbb94dcf885bb7a85c5007 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 16 Oct 2019 21:14:15 +0200
Subject: [PATCH 05/34] Modify resources.fetch_schema_locations()

  - Now can returns location for another namespace if hints for
    resource namespace are missing
---
 xmlschema/resources.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 1cf37c1..35478d4 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -169,12 +169,17 @@ def fetch_schema_locations(source, locations=None, **resource_options):
     base_url = resource.base_url
     namespace = resource.namespace
     locations = resource.get_locations(locations)
-    for ns, url in filter(lambda x: x[0] == namespace, locations):
+    if not locations:
+        msg = "the XML data resource {!r} does not contain any schema location hint."
+        raise XMLSchemaValueError(msg.format(source))
+
+    for ns, url in sorted(locations, key=lambda x: x[0] != namespace):
         try:
             return fetch_resource(url, base_url, timeout), locations
         except XMLSchemaURLError:
             pass
-    raise XMLSchemaValueError("not found a schema for XML data resource %r (namespace=%r)." % (source, namespace))
+
+    raise XMLSchemaValueError("not found a schema for XML data resource {!r}.".format(source))
 
 
 def fetch_schema(source, locations=None, **resource_options):

From 257ef230c4b4c42a3374d0a5bc620c68d579e0fd Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 17 Oct 2019 10:50:24 +0200
Subject: [PATCH 06/34] Update test_resources.py

---
 xmlschema/tests/test_resources.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index f5dbd5d..2519be2 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -14,6 +14,7 @@ This module runs tests concerning resources.
 """
 import unittest
 import os
+import platform
 
 try:
     from pathlib import PureWindowsPath, PurePath
@@ -22,9 +23,9 @@ except ImportError:
 
 from xmlschema import (
     fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations,
-    load_xml_resource, XMLResource, XMLSchemaURLError
+    load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema
 )
-from xmlschema.tests import casepath
+from xmlschema.tests import SKIP_REMOTE_TESTS, casepath
 from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
 from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, \
     etree_element, py_etree_element
@@ -44,6 +45,7 @@ class TestResources(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
+        cls.schema_class = XMLSchema
         cls.vh_dir = casepath('examples/vehicles')
         cls.vh_xsd_file = casepath('examples/vehicles/vehicles.xsd')
         cls.vh_xml_file = casepath('examples/vehicles/vehicles.xml')

From 2ed02120026fe788f173680c269f904bbc88fc52 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Sat, 19 Oct 2019 00:04:28 +0200
Subject: [PATCH 07/34] Add option -n/--narrow to tests for skipping extra
 checks

---
 CHANGELOG.rst                                 |  6 +++
 xmlschema/tests/test_factory/arguments.py     |  4 +-
 xmlschema/tests/test_factory/factory.py       |  5 +-
 xmlschema/tests/test_factory/schema_tests.py  |  5 +-
 .../tests/test_factory/validation_tests.py    | 47 ++++++++++---------
 xmlschema/tests/test_resources.py             |  6 ++-
 xmlschema/validators/globals_.py              |  5 +-
 7 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 213513e..0af63c5 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,12 @@
 CHANGELOG
 *********
 
+`v1.0.16`_ (2019-10-XX)
+=======================
+* Improved XMLResource with zip files interface and lazy
+* Fix for validation with XSD wildcards and 'lax' process content
+* Fix for issue #1...
+
 `v1.0.15`_ (2019-10-13)
 =======================
 * Improved XPath 2.0 bindings
diff --git a/xmlschema/tests/test_factory/arguments.py b/xmlschema/tests/test_factory/arguments.py
index 95ff4c2..49326cd 100644
--- a/xmlschema/tests/test_factory/arguments.py
+++ b/xmlschema/tests/test_factory/arguments.py
@@ -21,6 +21,7 @@ import re
 import argparse
 
 TEST_FACTORY_OPTIONS = {
+    'narrow': '-n' in sys.argv or '--narrow' in sys.argv,         # Skip extra checks (eg. other converters)
     'extra_cases': '-x' in sys.argv or '--extra' in sys.argv,     # Include extra test cases
     'check_with_lxml': '-l' in sys.argv or '--lxml' in sys.argv,  # Check with lxml.etree.XMLSchema (for XSD 1.0)
 }
@@ -28,7 +29,8 @@ TEST_FACTORY_OPTIONS = {
 
 RUN_W3C_TEST_SUITE = '-w' in sys.argv or '--w3c' in sys.argv
 
-sys.argv = [a for a in sys.argv if a not in {'-x', '--extra', '-l', '--lxml'}]  # Clean sys.argv for unittest
+sys.argv = [a for a in sys.argv if a not in
+            {'-x', '--extra', '-l', '--lxml', '-n', '--narrow'}]  # Clean sys.argv for unittest
 
 
 def get_test_args(args_line):
diff --git a/xmlschema/tests/test_factory/factory.py b/xmlschema/tests/test_factory/factory.py
index 53e3700..08d62d4 100644
--- a/xmlschema/tests/test_factory/factory.py
+++ b/xmlschema/tests/test_factory/factory.py
@@ -38,6 +38,7 @@ def tests_factory(test_class_builder, suffix='xml'):
 
     test_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     testfiles = [os.path.join(test_dir, 'test_cases/testfiles')]
+    narrow = TEST_FACTORY_OPTIONS['narrow']
     if TEST_FACTORY_OPTIONS['extra_cases']:
         package_dir = os.path.dirname(os.path.dirname(test_dir))
         testfiles.extend(glob.glob(os.path.join(package_dir, 'test_cases/testfiles')))
@@ -94,7 +95,9 @@ def tests_factory(test_class_builder, suffix='xml'):
             schema_class = ObservedXMLSchema11 if test_args.inspect else XMLSchema11
             check_with_lxml = False
 
-        test_class = test_class_builder(test_file, test_args, test_num, schema_class, check_with_lxml)
+        test_class = test_class_builder(
+            test_file, test_args, test_num, schema_class, narrow, check_with_lxml
+        )
         test_classes[test_class.__name__] = test_class
         logger.debug("Add XSD %s test class %r.", test_args.version, test_class.__name__)
 
diff --git a/xmlschema/tests/test_factory/schema_tests.py b/xmlschema/tests/test_factory/schema_tests.py
index 6796ef3..5e3511a 100644
--- a/xmlschema/tests/test_factory/schema_tests.py
+++ b/xmlschema/tests/test_factory/schema_tests.py
@@ -27,7 +27,7 @@ from xmlschema.tests import XsdValidatorTestCase
 from .observers import SchemaObserver
 
 
-def make_schema_test_class(test_file, test_args, test_num, schema_class, check_with_lxml):
+def make_schema_test_class(test_file, test_args, test_num, schema_class, narrow, check_with_lxml):
     """
     Creates a schema test class.
 
@@ -35,6 +35,7 @@ def make_schema_test_class(test_file, test_args, test_num, schema_class, check_w
     :param test_args: line arguments for test case.
     :param test_num: a positive integer number associated with the test case.
     :param schema_class: the schema class to use.
+    :param narrow: skip extra checks (observed inspections).
     :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \
     Works only for XSD 1.0 tests.
     """
@@ -69,7 +70,7 @@ def make_schema_test_class(test_file, test_args, test_num, schema_class, check_w
                 xs = schema_class(xsd_file, locations=locations, defuse=defuse, loglevel=loglevel)
             self.errors.extend(xs.maps.all_errors)
 
-            if inspect:
+            if narrow and inspect:
                 components_ids = set([id(c) for c in xs.maps.iter_components()])
                 missing = [c for c in SchemaObserver.components if id(c) not in components_ids]
                 if any(c for c in missing):
diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py
index dfd2d50..651281e 100644
--- a/xmlschema/tests/test_factory/validation_tests.py
+++ b/xmlschema/tests/test_factory/validation_tests.py
@@ -47,7 +47,7 @@ def iter_nested_items(items, dict_class=dict, list_class=list):
         yield items
 
 
-def make_validator_test_class(test_file, test_args, test_num, schema_class, check_with_lxml):
+def make_validator_test_class(test_file, test_args, test_num, schema_class, narrow, check_with_lxml):
     """
     Creates a validator test class.
 
@@ -55,6 +55,7 @@ def make_validator_test_class(test_file, test_args, test_num, schema_class, chec
     :param test_args: line arguments for test case.
     :param test_num: a positive integer number associated with the test case.
     :param schema_class: the schema class to use.
+    :param narrow: skip other converters checks.
     :param check_with_lxml: if `True` compare with lxml XMLSchema class, reporting anomalies. \
     Works only for XSD 1.0 tests.
     """
@@ -239,19 +240,21 @@ def make_validator_test_class(test_file, test_args, test_num, schema_class, chec
             options = {'namespaces': namespaces, 'dict_class': ordered_dict_class}
 
             self.check_etree_encode(root, cdata_prefix='#', **options)  # Default converter
-            self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
-            self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
-            self.check_etree_encode(root, BadgerFishConverter, **options)
-            self.check_etree_encode(root, AbderaConverter, **options)
-            self.check_etree_encode(root, JsonMLConverter, **options)
+            if narrow:
+                self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
+                self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
+                self.check_etree_encode(root, BadgerFishConverter, **options)
+                self.check_etree_encode(root, AbderaConverter, **options)
+                self.check_etree_encode(root, JsonMLConverter, **options)
 
             options.pop('dict_class')
             self.check_json_serialization(root, cdata_prefix='#', **options)
-            self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
-            self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
-            self.check_json_serialization(root, BadgerFishConverter, **options)
-            self.check_json_serialization(root, AbderaConverter, **options)
-            self.check_json_serialization(root, JsonMLConverter, **options)
+            if narrow:
+                self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
+                self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
+                self.check_json_serialization(root, BadgerFishConverter, **options)
+                self.check_json_serialization(root, AbderaConverter, **options)
+                self.check_json_serialization(root, JsonMLConverter, **options)
 
         def check_decoding_and_encoding_with_lxml(self):
             xml_tree = lxml_etree.parse(xml_file)
@@ -280,19 +283,21 @@ def make_validator_test_class(test_file, test_args, test_num, schema_class, chec
                     'dict_class': ordered_dict_class,
                 }
                 self.check_etree_encode(root, cdata_prefix='#', **options)  # Default converter
-                self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
-                self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
-                self.check_etree_encode(root, BadgerFishConverter, **options)
-                self.check_etree_encode(root, AbderaConverter, **options)
-                self.check_etree_encode(root, JsonMLConverter, **options)
+                if narrow:
+                    self.check_etree_encode(root, ParkerConverter, validation='lax', **options)
+                    self.check_etree_encode(root, ParkerConverter, validation='skip', **options)
+                    self.check_etree_encode(root, BadgerFishConverter, **options)
+                    self.check_etree_encode(root, AbderaConverter, **options)
+                    self.check_etree_encode(root, JsonMLConverter, **options)
 
                 options.pop('dict_class')
                 self.check_json_serialization(root, cdata_prefix='#', **options)
-                self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
-                self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
-                self.check_json_serialization(root, BadgerFishConverter, **options)
-                self.check_json_serialization(root, AbderaConverter, **options)
-                self.check_json_serialization(root, JsonMLConverter, **options)
+                if narrow:
+                    self.check_json_serialization(root, ParkerConverter, validation='lax', **options)
+                    self.check_json_serialization(root, ParkerConverter, validation='skip', **options)
+                    self.check_json_serialization(root, BadgerFishConverter, **options)
+                    self.check_json_serialization(root, AbderaConverter, **options)
+                    self.check_json_serialization(root, JsonMLConverter, **options)
 
         def check_validate_and_is_valid_api(self):
             if expected_errors:
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index f5dbd5d..2519be2 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -14,6 +14,7 @@ This module runs tests concerning resources.
 """
 import unittest
 import os
+import platform
 
 try:
     from pathlib import PureWindowsPath, PurePath
@@ -22,9 +23,9 @@ except ImportError:
 
 from xmlschema import (
     fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations,
-    load_xml_resource, XMLResource, XMLSchemaURLError
+    load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema
 )
-from xmlschema.tests import casepath
+from xmlschema.tests import SKIP_REMOTE_TESTS, casepath
 from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
 from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, \
     etree_element, py_etree_element
@@ -44,6 +45,7 @@ class TestResources(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
+        cls.schema_class = XMLSchema
         cls.vh_dir = casepath('examples/vehicles')
         cls.vh_xsd_file = casepath('examples/vehicles/vehicles.xsd')
         cls.vh_xml_file = casepath('examples/vehicles/vehicles.xml')
diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py
index c9716d7..c469b63 100644
--- a/xmlschema/validators/globals_.py
+++ b/xmlschema/validators/globals_.py
@@ -15,7 +15,7 @@ from __future__ import unicode_literals
 import warnings
 from collections import Counter
 
-from ..compat import string_base_type
+from ..compat import string_base_type, lru_cache
 from ..exceptions import XMLSchemaKeyError, XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaWarning
 from ..namespaces import XSD_NAMESPACE, LOCATION_HINTS, NamespaceResourcesMap
 from ..qnames import XSD_REDEFINE, XSD_OVERRIDE, XSD_NOTATION, XSD_ANY_TYPE, \
@@ -385,6 +385,7 @@ class XsdGlobals(XsdValidator):
             elif not any(schema.url == obj.url and schema.__class__ == obj.__class__ for obj in ns_schemas):
                 ns_schemas.append(schema)
 
+    @lru_cache(maxsize=1000)
     def load_namespace(self, namespace, build=True):
         """
         Load namespace from available location hints. Returns `True` if the namespace
@@ -471,7 +472,7 @@ class XsdGlobals(XsdValidator):
                 self.namespaces = namespaces
 
         else:
-            self.missing_locations.clear()
+            del self.missing_locations[:]
             for global_map in self.global_maps:
                 global_map.clear()
             self.substitution_groups.clear()

From 43322b6bc0c76d425a13c1028f2c2bfea3cbda7a Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Sat, 19 Oct 2019 00:08:09 +0200
Subject: [PATCH 08/34] Refactor XmlResource after merge

  - Remove _document and _fid (use the attribute source instead)
---
 xmlschema/resources.py            | 130 +++++++++++++++++++-----------
 xmlschema/tests/test_resources.py |   9 +--
 2 files changed, 85 insertions(+), 54 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 4fff367..be80107 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -250,7 +250,7 @@ class XMLResource(object):
         if base_url is not None and not isinstance(base_url, string_base_type):
             raise XMLSchemaValueError(u"'base_url' argument has to be a string: {!r}".format(base_url))
 
-        self._root = self._document = self._url = self._text = self._fid = None
+        self._root = self._text = self._url = None
         self._base_url = base_url
         self.defuse = defuse
         self.timeout = timeout
@@ -279,7 +279,7 @@ class XMLResource(object):
 
     def __setattr__(self, name, value):
         if name == 'source':
-            self._root, self._document, self._text, self._url, self._fid = self._fromsource(value)
+            self._root, self._text, self._url = self._fromsource(value)
         elif name == 'defuse' and value not in DEFUSE_MODES:
             raise XMLSchemaValueError(u"'defuse' attribute: {!r} is not a defuse mode.".format(value))
         elif name == 'timeout' and (not isinstance(value, int) or value <= 0):
@@ -289,47 +289,54 @@ class XMLResource(object):
         super(XMLResource, self).__setattr__(name, value)
 
     def _fromsource(self, source):
-        url, lazy = None, self._lazy
-        if hasattr(source, 'tag'):
+        url = None
+        if hasattr(source, 'tag') and hasattr(source, 'attrib'):
             self._lazy = False
-            return source, None, None, None, None  # Source is already an Element --> nothing to load
+            return source, None, None  # Source is already an Element --> nothing to load
+
         elif isinstance(source, string_base_type):
             _url, self._url = self._url, None
             try:
-                if lazy:
+                if self._lazy:
                     # check if source is a string containing a valid XML root
                     for _, root in self.iterparse(StringIO(source), events=('start',)):
-                        return root, None, source, None, None
+                        return root, source, None
                 else:
-                    return self.fromstring(source), None, source, None, None
+                    return self.fromstring(source), source, None
             except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError):
                 if '\n' in source:
                     raise
             finally:
                 self._url = _url
+
             url = normalize_url(source) if '\n' not in source else None
 
         elif isinstance(source, StringIO):
             _url, self._url = self._url, None
             try:
-                if lazy:
+                if self._lazy:
                     for _, root in self.iterparse(source, events=('start',)):
-                        return root, None, source.getvalue(), None, None
+                        return root, source.getvalue(), None
                 else:
-                    document = self.parse(source)
-                    return document.getroot(), document, source.getvalue(), None, None
+                    return self.parse(source).getroot(), source.getvalue(), None
             finally:
                 self._url = _url
 
         elif hasattr(source, 'read'):
+            try:
+                # Save remote urls for open new resources (non seekable)
+                if is_remote_url(source.url):
+                    url = source.url
+            except AttributeError:
+                pass
+
             _url, self._url = self._url, url
             try:
-                if lazy:
+                if self._lazy:
                     for _, root in self.iterparse(source, events=('start',)):
-                        return root, None, None, url, source
+                        return root, None, url
                 else:
-                    document = self.parse(source)
-                    return document.getroot(), document, None, url, source
+                    return self.parse(source).getroot(), None, url
             finally:
                 self._url = _url
 
@@ -342,7 +349,7 @@ class XMLResource(object):
             else:
                 if hasattr(root, 'tag'):
                     self._lazy = False
-                    return root, source, None, None, None
+                    return root, None, None
 
         if url is None:
             raise XMLSchemaTypeError(
@@ -353,13 +360,11 @@ class XMLResource(object):
             resource = urlopen(url, timeout=self.timeout)
             _url, self._url = self._url, url
             try:
-                if lazy:
+                if self._lazy:
                     for _, root in self.iterparse(resource, events=('start',)):
-                        return root, None, None, url, None
+                        return root, None, url
                 else:
-                    document = self.parse(resource)
-                    root = document.getroot()
-                    return root, document, None, url, None
+                    return self.parse(resource).getroot(), None, url
             finally:
                 self._url = _url
                 resource.close()
@@ -369,14 +374,6 @@ class XMLResource(object):
         """The XML tree root Element."""
         return self._root
 
-    @property
-    def document(self):
-        """
-        The ElementTree document, `None` if the instance is lazy or is not created
-        from another document or from an URL.
-        """
-        return self._document
-
     @property
     def text(self):
         """The XML text source, `None` if it's not available."""
@@ -392,9 +389,22 @@ class XMLResource(object):
         """The base URL for completing relative locations."""
         return os.path.dirname(self._url) if self._url else self._base_url
 
+    @property
+    def document(self):
+        """
+        The resource as ElementTree XML document. It's `None` if the instance
+        is lazy or if it's an lxml Element.
+        """
+        if isinstance(self.source, ElementTree.ElementTree):
+            return self.source
+        elif hasattr(self.source, 'getroot') and hasattr(self.source, 'parse'):
+            return self.source  # lxml's _ElementTree
+        elif not self._lazy and not hasattr(self.root, 'nsmap'):
+            return ElementTree.ElementTree(self.root)
+
     @property
     def namespace(self):
-        """The namespace of the XML document."""
+        """The namespace of the XML resource."""
         return get_namespace(self._root.tag) if self._root is not None else None
 
     @staticmethod
@@ -477,24 +487,48 @@ class XMLResource(object):
         return obj
 
     def open(self):
-        """Returns a opened resource reader object for the instance URL."""
-        if self._fid is not None:
-            self._fid.seek(0)
-            return self._fid
-
-        if self._url is None:
+        """
+        Returns a opened resource reader object for the instance URL. If the
+        source attribute is a seekable file-like object rewind the source and
+        return it.
+        """
+        if self.seek(0) == 0:
+            return self.source
+        elif self._url is None:
             raise XMLSchemaValueError("can't open, the resource has no URL associated.")
         try:
             return urlopen(self._url, timeout=self.timeout)
         except URLError as err:
             raise XMLSchemaURLError(reason="cannot access to resource %r: %s" % (self._url, err.reason))
 
+    def seek(self, position):
+        if not hasattr(self.source, 'read'):
+            return
+
+        try:
+            if not self.source.seekable():
+                return
+        except AttributeError:
+            pass
+        else:
+            return self.source.seek(position)
+
+        try:
+            return self.source.seek(position)
+        except AttributeError:
+            pass
+
+        try:
+            return self.source.fp.seek(position)
+        except AttributeError:
+            pass
+
     def load(self):
         """
         Loads the XML text from the data source. If the data source is an Element
         the source XML text can't be retrieved.
         """
-        if self._url is None and self._fid is None:
+        if self._url is None and not hasattr(self.source, 'read'):
             return  # Created from Element or text source --> already loaded
 
         resource = self.open()
@@ -506,7 +540,7 @@ class XMLResource(object):
             # We don't want to close the file obj if it wasn't originally
             # opened by `XMLResource`. That is the concern of the code
             # where the file obj came from.
-            if self._fid is None:
+            if resource is not self.source:
                 resource.close()
 
         if isinstance(data, bytes):
@@ -537,9 +571,8 @@ class XMLResource(object):
             for elem in self._root.iter(tag):
                 yield elem
             return
-        elif self._fid is not None:
-            self._fid.seek(0)
-            resource = self._fid
+        elif self.seek(0) == 0:
+            resource = self.source
         elif self._url is not None:
             resource = urlopen(self._url, timeout=self.timeout)
         else:
@@ -551,7 +584,7 @@ class XMLResource(object):
                     yield elem
                 elem.clear()
         finally:
-            if self._fid is None:
+            if resource is not self.source:
                 resource.close()
 
     def iterfind(self, path=None, namespaces=None):
@@ -563,9 +596,8 @@ class XMLResource(object):
                 for e in iter_select(self._root, path, namespaces, strict=False):
                     yield e
             return
-        elif self._fid is not None:
-            self._fid.seek(0)
-            resource = self._fid
+        elif self.seek(0) == 0:
+            resource = self.source
         elif self._url is not None:
             resource = urlopen(self._url, timeout=self.timeout)
         else:
@@ -603,7 +635,7 @@ class XMLResource(object):
                         elif level == 0:
                             elem.clear()
         finally:
-            if self._fid is None:
+            if self.source is not resource:
                 resource.close()
 
     def iter_location_hints(self):
@@ -656,7 +688,7 @@ class XMLResource(object):
         local_root = self.root.tag[0] != '{'
         nsmap = {}
 
-        if self._url is not None or self._fid is not None:
+        if self._url is not None or hasattr(self.source, 'read'):
             resource = self.open()
             try:
                 for event, node in self.iterparse(resource, events=('start-ns', 'end')):
@@ -670,7 +702,7 @@ class XMLResource(object):
                 # We don't want to close the file obj if it wasn't
                 # originally opened by `XMLResource`. That is the concern
                 # of the code where the file obj came from.
-                if self._fid is None:
+                if self.source is not resource:
                     resource.close()
         elif isinstance(self._text, string_base_type):
             try:
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 2519be2..5acf4fa 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -179,7 +179,7 @@ class TestResources(unittest.TestCase):
 
         resource = XMLResource(vh_root)
         self.assertEqual(resource.source, vh_root)
-        self.assertIsNone(resource.document)
+        self.assertIsInstance(resource.document, ElementTree.ElementTree)
         self.assertEqual(resource.root.tag, '{http://example.com/vehicles}vehicles')
         self.assertIsNone(resource.url)
         self.assertIsNone(resource.text)
@@ -436,12 +436,11 @@ class TestResources(unittest.TestCase):
                 except (KeyError, AttributeError):
                     return getattr(self.__dict__["_fid"], attr)
 
-        fake_name = "not__on____disk.xml"
-        with open(self.vh_xml_file) as schema_file:
-            resource = XMLResource(FileProxy(schema_file, fake_name))
+        with open(self.vh_xml_file) as xml_file:
+            resource = XMLResource(FileProxy(xml_file, fake_name="not__on____disk.xml"))
             self.assertIsNone(resource.url)
             self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'})
-            self.assertFalse(schema_file.closed)
+            self.assertFalse(xml_file.closed)
 
             
 if __name__ == '__main__':

From 8dd5d193ba699b23bfc491bc70fe5cb3a0e0d85a Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Sat, 19 Oct 2019 19:31:43 +0200
Subject: [PATCH 09/34] Update XML resource iterfind() to fix issues #102 and
 #112

  - Speed up admitting simple paths and checking only elements
    that match path level
  - Avoid selection for * paths (about 35% faster)
  - Add close() method to XmlResource
---
 xmlschema/resources.py            |  71 ++++++++++++---
 xmlschema/tests/test_resources.py | 147 ++++++++++++++++++++++++++++--
 2 files changed, 200 insertions(+), 18 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index be80107..6fef39c 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -11,7 +11,7 @@
 import os.path
 import re
 import codecs
-from elementpath import iter_select, Selector
+from elementpath import iter_select, Selector, XPath1Parser
 
 from .compat import (
     PY3, StringIO, BytesIO, string_base_type, urlopen, urlsplit, urljoin, urlunsplit,
@@ -26,8 +26,23 @@ from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring
 DEFUSE_MODES = ('always', 'remote', 'never')
 
 
+XML_RESOURCE_XPATH_SYMBOLS = {
+    'position', 'last', 'not', 'and', 'or', '!=', '<=', '>=', '(', ')', 'text',
+    '[', ']', '.', ',', '/', '|', '*', '=', '<', '>', ':', '(end)', '(name)',
+    '(string)', '(float)', '(decimal)', '(integer)'
+}
+
+
+class XmlResourceXPathParser(XPath1Parser):
+    symbol_table = {k: v for k, v in XPath1Parser.symbol_table.items() if k in XML_RESOURCE_XPATH_SYMBOLS}
+    SYMBOLS = XML_RESOURCE_XPATH_SYMBOLS
+
+
+XmlResourceXPathParser.build_tokenizer()
+
+
 def is_remote_url(url):
-    return url is not None and urlsplit(url).scheme not in ('', 'file')
+    return isinstance(url, string_base_type) and urlsplit(url).scheme not in ('', 'file')
 
 
 def url_path_is_directory(url):
@@ -424,14 +439,23 @@ class XMLResource(object):
 
     def parse(self, source):
         """
-        An equivalent of *ElementTree.parse()* that can protect from XML entities attacks. When
-        protection is applied XML data are loaded and defused before building the ElementTree instance.
+        An equivalent of *ElementTree.parse()* that can protect from XML entities attacks.
+        When protection is applied XML data are loaded and defused before building the
+        ElementTree instance. The protection applied is based on value of *defuse*
+        attribute and *base_url* property.
 
         :param source: a filename or file object containing XML data.
         :returns: an ElementTree instance.
         """
-        if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
-            text = source.read()
+        if self.defuse == 'always' or self.defuse == 'remote' and \
+                hasattr(source, 'read') and is_remote_url(self.base_url):
+
+            if hasattr(source, 'read'):
+                text = source.read()
+            else:
+                with open(source) as f:
+                    text = f.read()
+
             if isinstance(text, bytes):
                 self.defusing(BytesIO(text))
                 return ElementTree.parse(BytesIO(text))
@@ -445,11 +469,14 @@ class XMLResource(object):
         """
         An equivalent of *ElementTree.iterparse()* that can protect from XML entities attacks.
         When protection is applied the iterator yields pure-Python Element instances.
+        The protection applied is based on resource *defuse* attribute and *base_url* property.
 
         :param source: a filename or file object containing XML data.
         :param events: a list of events to report back. If omitted, only “end” events are reported.
         """
-        if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
+        if self.defuse == 'always' or self.defuse == 'remote' and \
+                hasattr(source, 'read') and is_remote_url(self.base_url):
+
             parser = SafeXMLParser(target=PyElementTree.TreeBuilder())
             try:
                 return PyElementTree.iterparse(source, events, parser)
@@ -461,17 +488,20 @@ class XMLResource(object):
     def fromstring(self, text):
         """
         An equivalent of *ElementTree.fromstring()* that can protect from XML entities attacks.
+        The protection applied is based on resource *defuse* attribute and *base_url* property.
 
         :param text: a string containing XML data.
         :returns: the root Element instance.
         """
-        if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self._url):
+        if self.defuse == 'always' or self.defuse == 'remote' and is_remote_url(self.base_url):
             self.defusing(StringIO(text))
         return ElementTree.fromstring(text)
 
     def tostring(self, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False):
         """Generates a string representation of the XML resource."""
-        return etree_tostring(self._root, self.get_namespaces(), indent, max_lines, spaces_for_tab, xml_declaration)
+        elem = self._root
+        namespaces = self.get_namespaces()
+        return etree_tostring(elem, namespaces, indent, max_lines, spaces_for_tab, xml_declaration)
 
     def copy(self, **kwargs):
         """Resource copy method. Change init parameters with keyword arguments."""
@@ -502,6 +532,10 @@ class XMLResource(object):
             raise XMLSchemaURLError(reason="cannot access to resource %r: %s" % (self._url, err.reason))
 
     def seek(self, position):
+        """
+        Change stream position if the XML resource was created with a seekable
+        file-like object. In the other cases this method has no effect.
+        """
         if not hasattr(self.source, 'read'):
             return
 
@@ -523,6 +557,16 @@ class XMLResource(object):
         except AttributeError:
             pass
 
+    def close(self):
+        """
+        Close the XML resource if it's created with a file-like object.
+        In other cases this method has no effect.
+        """
+        try:
+            self.source.close()
+        except (AttributeError, TypeError):
+            pass
+
     def load(self):
         """
         Loads the XML text from the data source. If the data source is an Element
@@ -619,7 +663,11 @@ class XMLResource(object):
                             yield elem
                             elem.clear()
             else:
-                selector = Selector(path, namespaces, strict=False)
+                selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser)
+                path.replace(' ', '').replace('./', '')
+                path_level = path.count('/') + 1
+                select_all = '*' in path and set(path).issubset({'*', '/'})
+
                 level = 0
                 for event, elem in self.iterparse(resource, events=('start', 'end')):
                     if event == "start":
@@ -629,7 +677,8 @@ class XMLResource(object):
                         level += 1
                     else:
                         level -= 1
-                        if elem in selector.select(self._root):
+                        if level == path_level and \
+                                (select_all or elem in selector.select(self._root)):
                             yield elem
                             elem.clear()
                         elif level == 0:
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 5acf4fa..e668315 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -13,12 +13,14 @@
 This module runs tests concerning resources.
 """
 import unittest
+import time
 import os
 import platform
 
 try:
     from pathlib import PureWindowsPath, PurePath
 except ImportError:
+    # noinspection PyPackageRequirements
     from pathlib2 import PureWindowsPath, PurePath
 
 from xmlschema import (
@@ -29,6 +31,7 @@ from xmlschema.tests import SKIP_REMOTE_TESTS, casepath
 from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
 from xmlschema.etree import ElementTree, PyElementTree, lxml_etree, \
     etree_element, py_etree_element
+from xmlschema.namespaces import XSD_NAMESPACE
 from xmlschema.helpers import is_etree_element
 
 
@@ -344,14 +347,36 @@ class TestResources(unittest.TestCase):
         resource.load()
         self.assertTrue(resource.is_loaded())
 
-    def test_xml_resource_open(self):
+    def test_xml_resource_parse(self):
         resource = XMLResource(self.vh_xml_file)
-        xml_file = resource.open()
-        data = xml_file.read().decode('utf-8')
-        self.assertTrue(data.startswith('<?xml '))
-        xml_file.close()
-        resource = XMLResource('<A/>')
-        self.assertRaises(ValueError, resource.open)
+
+        self.assertEqual(resource.defuse, 'remote')
+        xml_document = resource.parse(self.col_xml_file)
+        self.assertTrue(is_etree_element(xml_document.getroot()))
+
+        resource.defuse = 'always'
+        xml_document = resource.parse(self.col_xml_file)
+        self.assertTrue(is_etree_element(xml_document.getroot()))
+
+    def test_xml_resource_iterparse(self):
+        resource = XMLResource(self.vh_xml_file)
+
+        self.assertEqual(resource.defuse, 'remote')
+        for _, elem in resource.iterparse(self.col_xml_file, events=('end',)):
+            self.assertTrue(is_etree_element(elem))
+
+        resource.defuse = 'always'
+        for _, elem in resource.iterparse(self.col_xml_file, events=('end',)):
+            self.assertTrue(is_etree_element(elem))
+
+    def test_xml_resource_fromstring(self):
+        resource = XMLResource(self.vh_xml_file)
+
+        self.assertEqual(resource.defuse, 'remote')
+        self.assertEqual(resource.fromstring('<root/>').tag, 'root')
+
+        resource.defuse = 'always'
+        self.assertEqual(resource.fromstring('<root/>').tag, 'root')
 
     def test_xml_resource_tostring(self):
         resource = XMLResource(self.vh_xml_file)
@@ -373,6 +398,114 @@ class TestResources(unittest.TestCase):
         resource2 = resource.copy()
         self.assertEqual(resource.text, resource2.text)
 
+    def test_xml_resource_open(self):
+        resource = XMLResource(self.vh_xml_file)
+        xml_file = resource.open()
+        self.assertIsNot(xml_file, resource.source)
+        data = xml_file.read().decode('utf-8')
+        self.assertTrue(data.startswith('<?xml '))
+        xml_file.close()
+        resource = XMLResource('<A/>')
+        self.assertRaises(ValueError, resource.open)
+
+        resource = XMLResource(source=open(self.vh_xml_file))
+        xml_file = resource.open()
+        self.assertIs(xml_file, resource.source)
+        xml_file.close()
+
+    def test_xml_resource_seek(self):
+        resource = XMLResource(self.vh_xml_file)
+        self.assertIsNone(resource.seek(0))
+        self.assertIsNone(resource.seek(1))
+        xml_file = open(self.vh_xml_file)
+        resource = XMLResource(source=xml_file)
+        self.assertEqual(resource.seek(0), 0)
+        self.assertEqual(resource.seek(1), 1)
+        xml_file.close()
+
+    def test_xml_resource_close(self):
+        resource = XMLResource(self.vh_xml_file)
+        resource.close()
+        xml_file = resource.open()
+        self.assertTrue(callable(xml_file.read))
+
+        xml_file = open(self.vh_xml_file)
+        resource = XMLResource(source=xml_file)
+        resource.close()
+        with self.assertRaises(ValueError):
+            resource.open()
+
+    def test_xml_resource_iter(self):
+        for lazy in (False, True):
+            resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=lazy)
+            k = 0
+            for k, _ in enumerate(resource.iter()):
+                pass
+            self.assertEqual(k, 1389)
+
+            k = 0
+            for k, _ in enumerate(resource.iter('{%s}complexType' % XSD_NAMESPACE)):
+                pass
+            self.assertEqual(k, 55)
+
+    def test_xml_resource_iterfind(self):
+        resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=False)
+        self.assertFalse(resource.is_lazy())
+
+        start_time = time.time()
+        for _ in range(10):
+            for _ in resource.iterfind():
+                pass
+        t1 = time.time() - start_time
+
+        start_time = time.time()
+        for _ in range(10):
+            for _ in resource.iterfind(path='.'):
+                pass
+        t2 = time.time() - start_time
+        self.assertLessEqual(t1, t2 / 30.0)
+        self.assertGreaterEqual(t1, t2 / 100.0)
+
+        start_time = time.time()
+        counter = 0
+        for _ in resource.iterfind(path='*'):
+            counter += 1
+        t3 = time.time() - start_time
+        self.assertGreaterEqual(t2, t3 / counter * 10)
+
+        resource = XMLResource(self.schema_class.meta_schema.source.url)
+        self.assertTrue(resource.is_lazy())
+
+        start_time = time.time()
+        for _ in range(10):
+            for _ in resource.iterfind():
+                pass
+        tl1 = time.time() - start_time
+        self.assertLessEqual(t1, tl1 / 1000.0)
+        self.assertGreaterEqual(t1, tl1 / 10000.0)
+
+        start_time = time.time()
+        for _ in range(10):
+            for _ in resource.iterfind(path='.'):
+                pass
+        tl2 = time.time() - start_time
+
+        self.assertLessEqual(t2, tl2 / 80.0)
+        self.assertGreaterEqual(t2, tl2 / 1000.0)
+
+        start_time = time.time()
+        counter3 = 0
+        for _ in resource.iterfind(path='*'):
+            counter3 += 1
+        tl3 = time.time() - start_time
+        self.assertGreaterEqual(tl2, tl3 / counter3 * 10)
+
+        start_time = time.time()
+        for _ in resource.iterfind(path='. /. / xs:complexType', namespaces={'xs': XSD_NAMESPACE}):
+            pass
+        tl4 = time.time() - start_time
+        self.assertTrue(0.7 < (tl3 / tl4) < 1)
+
     def test_xml_resource_get_namespaces(self):
         with open(self.vh_xml_file) as schema_file:
             resource = XMLResource(schema_file)

From 8db83477dfaa03c352abef859051e4976dd4b2d4 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Tue, 22 Oct 2019 14:28:46 +0200
Subject: [PATCH 10/34] Extend check_memory.py script

  - Add an argument to repeat test N times
  - Add matplotlib to dev requirements
---
 requirements-dev.txt            |  1 +
 xmlschema/tests/check_memory.py | 73 ++++++++++++++++++---------------
 2 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 83dfcbd..7ae0c91 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,6 +5,7 @@ coverage
 elementpath~=1.3.0
 lxml
 memory_profiler
+matplotlib
 pathlib2  # For Py27 tests on resources
 Sphinx
 sphinx_rtd_theme
diff --git a/xmlschema/tests/check_memory.py b/xmlschema/tests/check_memory.py
index 4a0c936..606c0dd 100755
--- a/xmlschema/tests/check_memory.py
+++ b/xmlschema/tests/check_memory.py
@@ -28,7 +28,7 @@ def test_choice_type(value):
 
 
 parser = argparse.ArgumentParser(add_help=True)
-parser.usage = """%(prog)s TEST_NUM [XML_FILE]
+parser.usage = """%(prog)s TEST_NUM [XML_FILE [REPEAT]]
 
 Run memory tests:
   1) Package import or schema build
@@ -44,6 +44,7 @@ Run memory tests:
 
 parser.add_argument('test_num', metavar="TEST_NUM", type=test_choice_type, help="Test number to run")
 parser.add_argument('xml_file', metavar='XML_FILE', nargs='?', help='Input XML file')
+parser.add_argument('repeat', metavar='REPEAT', nargs='?', type=int, default=1, help='Repeat operation N times')
 args = parser.parse_args()
 
 
@@ -68,54 +69,62 @@ def build_schema(source):
 
 
 @profile
-def etree_parse(source):
+def etree_parse(source, repeat=1):
     xt = ElementTree.parse(source)
-    for _ in xt.iter():
-        pass
-
-
-@profile
-def etree_full_iterparse(source):
-    context = ElementTree.iterparse(source, events=('start', 'end'))
-    for event, elem in context:
-        if event == 'start':
+    for _ in range(repeat):
+        for _ in xt.iter():
             pass
 
 
 @profile
-def etree_emptied_iterparse(source):
-    context = ElementTree.iterparse(source, events=('start', 'end'))
-    for event, elem in context:
-        if event == 'end':
-            elem.clear()
+def etree_full_iterparse(source, repeat=1):
+    for _ in range(repeat):
+        context = ElementTree.iterparse(source, events=('start', 'end'))
+        for event, elem in context:
+            if event == 'start':
+                pass
 
 
 @profile
-def decode(source):
+def etree_emptied_iterparse(source, repeat=1):
+    for _ in range(repeat):
+        context = ElementTree.iterparse(source, events=('start', 'end'))
+        for event, elem in context:
+            if event == 'end':
+                elem.clear()
+
+
+@profile
+def decode(source, repeat=1):
     decoder = xmlschema.XMLSchema.meta_schema if source.endswith('.xsd') else xmlschema
-    return decoder.to_dict(source)
+    for _ in range(repeat):
+        decoder.to_dict(source)
 
 
 @profile
-def lazy_decode(source):
+def lazy_decode(source, repeat=1):
     decoder = xmlschema.XMLSchema.meta_schema if source.endswith('.xsd') else xmlschema
-    for result in decoder.to_dict(xmlschema.XMLResource(source, lazy=True), path='*'):
-        del result
+    for _ in range(repeat):
+        for result in decoder.to_dict(xmlschema.XMLResource(source, lazy=True), path='*'):
+            del result
 
 
 @profile
-def validate(source):
+def validate(source, repeat=1):
     validator = xmlschema.XMLSchema.meta_schema if source.endswith('.xsd') else xmlschema
-    return validator.validate(source)
+    for _ in range(repeat):
+        validator.validate(source)
 
 
 @profile
-def lazy_validate(source):
+def lazy_validate(source, repeat=1):
     if source.endswith('.xsd'):
         validator, path = xmlschema.XMLSchema.meta_schema, '*'
     else:
         validator, path = xmlschema, None
-    return validator.validate(xmlschema.XMLResource(source, lazy=True), path=path)
+
+    for _ in range(repeat):
+        validator.validate(xmlschema.XMLResource(source, lazy=True), path=path)
 
 
 if __name__ == '__main__':
@@ -127,26 +136,26 @@ if __name__ == '__main__':
             build_schema(args.xml_file)
     elif args.test_num == 2:
         import xml.etree.ElementTree as ElementTree
-        etree_parse(args.xml_file)
+        etree_parse(args.xml_file, args.repeat)
     elif args.test_num == 3:
         import xml.etree.ElementTree as ElementTree
-        etree_full_iterparse(args.xml_file)
+        etree_full_iterparse(args.xml_file, args.repeat)
     elif args.test_num == 4:
         import xml.etree.ElementTree as ElementTree
-        etree_emptied_iterparse(args.xml_file)
+        etree_emptied_iterparse(args.xml_file, args.repeat)
     elif args.test_num == 5:
         import xmlschema
         xmlschema.XMLSchema.meta_schema.build()
-        decode(args.xml_file)
+        decode(args.xml_file, args.repeat)
     elif args.test_num == 6:
         import xmlschema
         xmlschema.XMLSchema.meta_schema.build()
-        lazy_decode(args.xml_file)
+        lazy_decode(args.xml_file, args.repeat)
     elif args.test_num == 7:
         import xmlschema
         xmlschema.XMLSchema.meta_schema.build()
-        validate(args.xml_file)
+        validate(args.xml_file, args.repeat)
     elif args.test_num == 8:
         import xmlschema
         xmlschema.XMLSchema.meta_schema.build()
-        lazy_validate(args.xml_file)
+        lazy_validate(args.xml_file, args.repeat)

From c075ff22e546c02c9c80e7c0215814e2ec380edf Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Tue, 22 Oct 2019 18:37:26 +0200
Subject: [PATCH 11/34] Complete the revision of resource module

  - normalize_url() now processes file names containing '#' chars
  - Fix iterfind() of lazy resource
  - Add more tests for XML resources
---
 xmlschema/resources.py                        |  28 +++--
 .../test_cases/resources/dummy file #2.txt    |   1 +
 xmlschema/tests/test_resources.py             | 113 +++++++++---------
 xmlschema/validators/schemas/puppet.xsd       |  32 -----
 4 files changed, 71 insertions(+), 103 deletions(-)
 create mode 100644 xmlschema/tests/test_cases/resources/dummy file #2.txt
 delete mode 100644 xmlschema/validators/schemas/puppet.xsd

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 6fef39c..94832fb 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -66,14 +66,21 @@ def normalize_url(url, base_url=None, keep_relative=False):
     conformant to URL format specification.
     :return: A normalized URL.
     """
-    def add_trailing_slash(r):
-        return urlunsplit((r[0], r[1], r[2] + '/' if r[2] and r[2][-1] != '/' else r[2], r[3], r[4]))
+    def add_trailing_slash(x):
+        return urlunsplit((x[0], x[1], x[2] + '/' if x[2] and x[2][-1] != '/' else x[2], x[3], x[4]))
+
+    def filter_url(x):
+        x = x.strip().replace('\\', '/')
+        while x.startswith('//'):
+            x = x.replace('//', '/', 1)
+        if not urlsplit(x).scheme:
+            x = x.replace('#', '%23')
+        return x
+
+    url = filter_url(url)
 
     if base_url is not None:
-        base_url = base_url.replace('\\', '/')
-        while base_url.startswith('//'):
-            base_url = base_url.replace('//', '/', 1)
-
+        base_url = filter_url(base_url)
         base_url_parts = urlsplit(base_url)
         base_url = add_trailing_slash(base_url_parts)
         if base_url_parts.scheme not in uses_relative:
@@ -102,10 +109,6 @@ def normalize_url(url, base_url=None, keep_relative=False):
                 if base_url_parts.netloc and not url.startswith(base_url_parts.netloc) and url.startswith('//'):
                     url = 'file:' + url
 
-    url = url.replace('\\', '/')
-    while url.startswith('//'):
-        url = url.replace('//', '/', 1)
-
     url_parts = urlsplit(url, scheme='file')
     if url_parts.scheme not in uses_relative:
         return 'file:///{}'.format(url_parts.geturl())  # Eg. k:/Python/lib/....
@@ -622,6 +625,7 @@ class XMLResource(object):
         else:
             resource = StringIO(self._text)
 
+        # Note: lazy iteration change the order (top level element is the last)
         try:
             for event, elem in self.iterparse(resource, events=('end',)):
                 if tag is None or elem.tag == tag:
@@ -664,8 +668,8 @@ class XMLResource(object):
                             elem.clear()
             else:
                 selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser)
-                path.replace(' ', '').replace('./', '')
-                path_level = path.count('/') + 1
+                path = path.replace(' ', '').replace('./', '')
+                path_level = path.count('/') + 1 if path != '.' else 0
                 select_all = '*' in path and set(path).issubset({'*', '/'})
 
                 level = 0
diff --git a/xmlschema/tests/test_cases/resources/dummy file #2.txt b/xmlschema/tests/test_cases/resources/dummy file #2.txt
new file mode 100644
index 0000000..a9e6024
--- /dev/null
+++ b/xmlschema/tests/test_cases/resources/dummy file #2.txt	
@@ -0,0 +1 @@
+DUMMY CONTENT
\ No newline at end of file
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index e668315..38f94a4 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -13,7 +13,6 @@
 This module runs tests concerning resources.
 """
 import unittest
-import time
 import os
 import platform
 
@@ -123,12 +122,25 @@ class TestResources(unittest.TestCase):
         self.assertEqual(normalize_url('dir2/schema.xsd', '//root/dir1'), 'file:///root/dir1/dir2/schema.xsd')
         self.assertEqual(normalize_url('dir2/schema.xsd', '////root/dir1'), 'file:///root/dir1/dir2/schema.xsd')
 
+        self.check_url(normalize_url('issue #000.xml', 'file://host/home/'),
+                       'file://host/home/issue %23000.xml')
+        self.check_url(normalize_url('data.xml', 'file://host/home/issue 000'),
+                       'file://host/home/issue 000/data.xml')
+        self.check_url(normalize_url('data.xml', '/host/home/issue #000'),
+                       '/host/home/issue %23000/data.xml')
+
     def test_fetch_resource(self):
         wrong_path = casepath('resources/dummy_file.txt')
         self.assertRaises(XMLSchemaURLError, fetch_resource, wrong_path)
         right_path = casepath('resources/dummy file.txt')
         self.assertTrue(fetch_resource(right_path).endswith('dummy file.txt'))
 
+        ambiguous_path = casepath('resources/dummy file #2.txt')
+        self.assertTrue(fetch_resource(ambiguous_path).endswith('dummy file %232.txt'))
+
+        with urlopen(fetch_resource(ambiguous_path)) as res:
+            self.assertEqual(res.read(), b'DUMMY CONTENT')
+
     def test_fetch_namespaces(self):
         self.assertFalse(fetch_namespaces(casepath('resources/malformed.xml')))
 
@@ -436,75 +448,58 @@ class TestResources(unittest.TestCase):
             resource.open()
 
     def test_xml_resource_iter(self):
-        for lazy in (False, True):
-            resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=lazy)
-            k = 0
-            for k, _ in enumerate(resource.iter()):
-                pass
-            self.assertEqual(k, 1389)
-
-            k = 0
-            for k, _ in enumerate(resource.iter('{%s}complexType' % XSD_NAMESPACE)):
-                pass
-            self.assertEqual(k, 55)
-
-    def test_xml_resource_iterfind(self):
         resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=False)
         self.assertFalse(resource.is_lazy())
+        lazy_resource = XMLResource(self.schema_class.meta_schema.source.url)
+        self.assertTrue(lazy_resource.is_lazy())
 
-        start_time = time.time()
-        for _ in range(10):
-            for _ in resource.iterfind():
-                pass
-        t1 = time.time() - start_time
+        tags = [x.tag for x in resource.iter()]
+        self.assertEqual(len(tags), 1390)
+        self.assertEqual(tags[0], '{%s}schema' % XSD_NAMESPACE)
 
-        start_time = time.time()
-        for _ in range(10):
-            for _ in resource.iterfind(path='.'):
-                pass
-        t2 = time.time() - start_time
-        self.assertLessEqual(t1, t2 / 30.0)
-        self.assertGreaterEqual(t1, t2 / 100.0)
+        lazy_tags = [x.tag for x in lazy_resource.iter()]
+        self.assertEqual(len(lazy_tags), 1390)
+        self.assertEqual(lazy_tags[-1], '{%s}schema' % XSD_NAMESPACE)
+        self.assertNotEqual(tags, lazy_tags)
 
-        start_time = time.time()
-        counter = 0
-        for _ in resource.iterfind(path='*'):
-            counter += 1
-        t3 = time.time() - start_time
-        self.assertGreaterEqual(t2, t3 / counter * 10)
+        tags = [x.tag for x in resource.iter('{%s}complexType' % XSD_NAMESPACE)]
+        self.assertEqual(len(tags), 56)
+        self.assertEqual(tags[0], '{%s}complexType' % XSD_NAMESPACE)
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iter('{%s}complexType' % XSD_NAMESPACE)])
 
-        resource = XMLResource(self.schema_class.meta_schema.source.url)
-        self.assertTrue(resource.is_lazy())
+    def test_xml_resource_iterfind(self):
+        namespaces = {'xs': XSD_NAMESPACE}
+        resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=False)
+        self.assertFalse(resource.is_lazy())
+        lazy_resource = XMLResource(self.schema_class.meta_schema.source.url)
+        self.assertTrue(lazy_resource.is_lazy())
 
-        start_time = time.time()
-        for _ in range(10):
-            for _ in resource.iterfind():
-                pass
-        tl1 = time.time() - start_time
-        self.assertLessEqual(t1, tl1 / 1000.0)
-        self.assertGreaterEqual(t1, tl1 / 10000.0)
+        # Note: Element change with lazy resource so compare only tags
 
-        start_time = time.time()
-        for _ in range(10):
-            for _ in resource.iterfind(path='.'):
-                pass
-        tl2 = time.time() - start_time
+        tags = [x.tag for x in resource.iterfind()]
+        self.assertEqual(len(tags), 1)
+        self.assertEqual(tags[0], '{%s}schema' % XSD_NAMESPACE)
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iterfind()])
 
-        self.assertLessEqual(t2, tl2 / 80.0)
-        self.assertGreaterEqual(t2, tl2 / 1000.0)
+        tags = [x.tag for x in resource.iterfind(path='.')]
+        self.assertEqual(len(tags), 1)
+        self.assertEqual(tags[0], '{%s}schema' % XSD_NAMESPACE)
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iterfind(path='.')])
 
-        start_time = time.time()
-        counter3 = 0
-        for _ in resource.iterfind(path='*'):
-            counter3 += 1
-        tl3 = time.time() - start_time
-        self.assertGreaterEqual(tl2, tl3 / counter3 * 10)
+        tags = [x.tag for x in resource.iterfind(path='*')]
+        self.assertEqual(len(tags), 156)
+        self.assertEqual(tags[0], '{%s}annotation' % XSD_NAMESPACE)
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iterfind(path='*')])
 
-        start_time = time.time()
-        for _ in resource.iterfind(path='. /. / xs:complexType', namespaces={'xs': XSD_NAMESPACE}):
-            pass
-        tl4 = time.time() - start_time
-        self.assertTrue(0.7 < (tl3 / tl4) < 1)
+        tags = [x.tag for x in resource.iterfind('xs:complexType', namespaces)]
+        self.assertEqual(len(tags), 35)
+        self.assertTrue(all(t == '{%s}complexType' % XSD_NAMESPACE for t in tags))
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iterfind('xs:complexType', namespaces)])
+
+        tags = [x.tag for x in resource.iterfind('. /. / xs:complexType', namespaces)]
+        self.assertEqual(len(tags), 35)
+        self.assertTrue(all(t == '{%s}complexType' % XSD_NAMESPACE for t in tags))
+        self.assertListEqual(tags, [x.tag for x in lazy_resource.iterfind('. /. / xs:complexType', namespaces)])
 
     def test_xml_resource_get_namespaces(self):
         with open(self.vh_xml_file) as schema_file:
diff --git a/xmlschema/validators/schemas/puppet.xsd b/xmlschema/validators/schemas/puppet.xsd
deleted file mode 100644
index 4434ff4..0000000
--- a/xmlschema/validators/schemas/puppet.xsd
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<schema xmlns="http://www.w3.org/2001/XMLSchema">
-    <annotation>
-        <documentation>
-            A schema with puppet types for creating substitute elements.
-        </documentation>
-    </annotation>
-
-    <simpleType name="simple_puppet">
-        <union>
-            <simpleType>
-                <list itemType="float"/>
-            </simpleType>
-            <simpleType>
-                <list itemType="integer"/>
-            </simpleType>
-            <simpleType>
-                <restriction base="float"/>
-            </simpleType>
-            <simpleType>
-                <restriction base="int"/>
-            </simpleType>
-            <simpleType>
-                <restriction base="string"/>
-            </simpleType>
-        </union>
-    </simpleType>
-
-    <complexType name="complex_puppet">
-        <
-    </complexType>
-</schema>
\ No newline at end of file

From 6942be8ac90cce426151372c7d36bcae369e2a7e Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 23 Oct 2019 09:47:49 +0200
Subject: [PATCH 12/34] Optimize qname_to_prefixed() and get_namespace()
 helpers

  - use_empty optional argument added to qname_to_prefixed()
---
 xmlschema/namespaces.py            |  3 +++
 xmlschema/qnames.py                | 32 +++++++++++++++++-------------
 xmlschema/tests/test_helpers.py    | 18 ++++++++++++++++-
 xmlschema/validators/exceptions.py |  5 +++--
 4 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py
index 44cd453..67f8e4b 100644
--- a/xmlschema/namespaces.py
+++ b/xmlschema/namespaces.py
@@ -70,6 +70,9 @@ NAMESPACE_PATTERN = re.compile(r'{([^}]*)}')
 
 
 def get_namespace(name):
+    if not name or name[0] != '{':
+        return ''
+
     try:
         return NAMESPACE_PATTERN.match(name).group(1)
     except (AttributeError, TypeError):
diff --git a/xmlschema/qnames.py b/xmlschema/qnames.py
index eb4f27d..0f80411 100644
--- a/xmlschema/qnames.py
+++ b/xmlschema/qnames.py
@@ -224,34 +224,38 @@ def local_name(qname):
         return qname
 
 
-def qname_to_prefixed(qname, namespaces):
+def qname_to_prefixed(qname, namespaces, use_empty=True):
     """
-    Transforms a fully qualified name into a prefixed name using a namespace map.
-    Returns the *qname* argument if it's not a fully qualified name or if it has
-    boolean value `False`.
+    Maps a QName in extended format to a QName in prefixed format.
+    Do not change local names and QNames in prefixed format.
 
-    :param qname: an extended QName or a local name.
+    :param qname: a QName or a local name.
     :param namespaces: a map from prefixes to namespace URIs.
+    :param use_empty: if `True` use the empty prefix for mapping.
     :return: a QName in prefixed format or a local name.
     """
-    if not qname:
+    if not qname or qname[0] != '{':
         return qname
 
     namespace = get_namespace(qname)
-    for prefix, uri in sorted(filter(lambda x: x[1] == namespace, namespaces.items()), reverse=True):
-        if not uri:
-            return '%s:%s' % (prefix, qname) if prefix else qname
-        elif prefix:
-            return qname.replace('{%s}' % uri, '%s:' % prefix)
-        else:
-            return qname.replace('{%s}' % uri, '')
+    prefixes = [x for x in namespaces if namespaces[x] == namespace]
+
+    if not prefixes:
+        return qname
+    elif prefixes[0]:
+        return '%s:%s' % (prefixes[0], qname.split('}', 1)[1])
+    elif len(prefixes) > 1:
+        return '%s:%s' % (prefixes[1], qname.split('}', 1)[1])
+    elif use_empty:
+        return qname.split('}', 1)[1]
     else:
         return qname
 
 
 def qname_to_extended(qname, namespaces):
     """
-    Converts a QName in prefixed format or a local name to the extended QName format.
+    Maps a QName in prefixed format or a local name to the extended QName format.
+    Local names are mapped if *namespaces* has a not empty default namespace.
 
     :param qname: a QName in prefixed format or a local name.
     :param namespaces: a map from prefixes to namespace URIs.
diff --git a/xmlschema/tests/test_helpers.py b/xmlschema/tests/test_helpers.py
index be195ef..5a9c894 100644
--- a/xmlschema/tests/test_helpers.py
+++ b/xmlschema/tests/test_helpers.py
@@ -40,6 +40,9 @@ class TestHelpers(unittest.TestCase):
         self.assertEqual(get_namespace(XSD_SIMPLE_TYPE), XSD_NAMESPACE)
         self.assertEqual(get_namespace(''), '')
         self.assertEqual(get_namespace(None), '')
+        self.assertEqual(get_namespace('{}name'), '')
+        self.assertEqual(get_namespace('{  }name'), '  ')
+        self.assertEqual(get_namespace('{ ns }name'), ' ns ')
 
     def test_get_qname_functions(self):
         self.assertEqual(get_qname(XSD_NAMESPACE, 'element'), XSD_ELEMENT)
@@ -81,8 +84,21 @@ class TestHelpers(unittest.TestCase):
         self.assertEqual(qname_to_prefixed('', {}), '')
 
         self.assertEqual(qname_to_prefixed('type', {'': XSI_NAMESPACE}), 'type')
-        self.assertEqual(qname_to_prefixed('type', {'ns': ''}), 'ns:type')
         self.assertEqual(qname_to_prefixed('type', {'': ''}), 'type')
+        self.assertEqual(qname_to_prefixed('{}type', {'': ''}), 'type')
+        self.assertEqual(qname_to_prefixed('{}type', {'': ''}, use_empty=False), '{}type')
+
+        # Attention! in XML the empty namespace (that means no namespace) can be
+        # associated only with empty prefix, so these cases should never happen.
+        self.assertEqual(qname_to_prefixed('{}type', {'p': ''}), 'p:type')
+        self.assertEqual(qname_to_prefixed('type', {'p': ''}), 'type')
+
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns'}, use_empty=True), 'type')
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns'}, use_empty=False), '{ns}type')
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns', 'p': 'ns'}, use_empty=True), 'p:type')
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns', 'p': 'ns'}, use_empty=False), 'p:type')
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns', 'p': 'ns0'}, use_empty=True), 'type')
+        self.assertEqual(qname_to_prefixed('{ns}type', {'': 'ns', 'p': 'ns0'}, use_empty=False), '{ns}type')
 
     def test_get_xsd_annotation(self):
         elem = etree_element(XSD_SCHEMA)
diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py
index 3ed988f..4ff969a 100644
--- a/xmlschema/validators/exceptions.py
+++ b/xmlschema/validators/exceptions.py
@@ -15,6 +15,7 @@ from __future__ import unicode_literals
 
 from ..compat import PY3, string_base_type
 from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError
+from ..namespaces import get_namespace
 from ..qnames import qname_to_prefixed
 from ..etree import etree_tostring, etree_getpath
 from ..helpers import is_etree_element
@@ -317,11 +318,11 @@ class XMLSchemaChildrenValidationError(XMLSchemaValidationError):
         self.occurs = occurs
         self.expected = expected
 
-        tag = qname_to_prefixed(elem.tag, validator.namespaces)
+        tag = qname_to_prefixed(elem.tag, validator.namespaces, use_empty=False)
         if index >= len(elem):
             reason = "The content of element %r is not complete." % tag
         else:
-            child_tag = qname_to_prefixed(elem[index].tag, validator.namespaces)
+            child_tag = qname_to_prefixed(elem[index].tag, validator.namespaces, use_empty=False)
             reason = "Unexpected child with tag %r at position %d." % (child_tag, index + 1)
 
         if occurs and particle.is_missing(occurs):

From a374d1580573b9b220334c079a5ed796276c11a1 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 24 Oct 2019 06:37:31 +0200
Subject: [PATCH 13/34] Fix resource tests for Python 2

---
 doc/usage.rst                                 | 15 ++++++++++++---
 xmlschema/resources.py                        |  8 ++++++--
 xmlschema/tests/test_resources.py             |  7 +++++--
 xmlschema/tests/validation/test_validation.py |  9 ++++++++-
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/doc/usage.rst b/doc/usage.rst
index fda3cde..bb22bff 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -40,7 +40,7 @@ Otherwise the argument can be also an opened file-like object:
 .. doctest::
 
     >>> import xmlschema
-    >>> schema_file = open('xmlschema/tests/test_cases/examples/vehicles/vehicles.xsd')
+    >>> schema_file = open('xmlschema/tests/test_cases/examples/collection/collection.xsd')
     >>> schema = xmlschema.XMLSchema(schema_file)
 
 Alternatively you can pass a string containing the schema definition:
@@ -54,8 +54,8 @@ Alternatively you can pass a string containing the schema definition:
     ... </xs:schema>
     ... """)
 
-this option might not works when the schema includes other local subschemas, because the package
-cannot knows anything about the schema's source location:
+Strings and file-like objects might not work when the schema includes other local subschemas,
+because the package cannot knows anything about the schema's source location:
 
 .. doctest::
 
@@ -73,6 +73,15 @@ cannot knows anything about the schema's source location:
 
     Path: /xs:schema/xs:element/xs:complexType/xs:sequence/xs:element
 
+In these cases you can provide an appropriate *base_url* optional argument to define the
+reference directory path for other includes and imports:
+
+.. doctest::
+
+    >>> import xmlschema
+    >>> schema_file = open('xmlschema/tests/test_cases/examples/vehicles/vehicles.xsd')
+    >>> schema = xmlschema.XMLSchema(schema_file, base_url='xmlschema/tests/test_cases/examples/vehicles/')
+
 
 XSD declarations
 ----------------
diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 94832fb..55ad1ab 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -551,14 +551,18 @@ class XMLResource(object):
             return self.source.seek(position)
 
         try:
-            return self.source.seek(position)
+            value = self.source.seek(position)
         except AttributeError:
             pass
+        else:
+            return value if PY3 else position
 
         try:
-            return self.source.fp.seek(position)
+            value = self.source.fp.seek(position)
         except AttributeError:
             pass
+        else:
+            return value if PY3 else position
 
     def close(self):
         """
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 38f94a4..447ddad 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -138,8 +138,11 @@ class TestResources(unittest.TestCase):
         ambiguous_path = casepath('resources/dummy file #2.txt')
         self.assertTrue(fetch_resource(ambiguous_path).endswith('dummy file %232.txt'))
 
-        with urlopen(fetch_resource(ambiguous_path)) as res:
+        res = urlopen(fetch_resource(ambiguous_path))
+        try:
             self.assertEqual(res.read(), b'DUMMY CONTENT')
+        finally:
+            res.close()
 
     def test_fetch_namespaces(self):
         self.assertFalse(fetch_namespaces(casepath('resources/malformed.xml')))
@@ -570,7 +573,7 @@ class TestResources(unittest.TestCase):
             self.assertEqual(set(resource.get_namespaces().keys()), {'vh', 'xsi'})
             self.assertFalse(xml_file.closed)
 
-            
+
 if __name__ == '__main__':
     from xmlschema.tests import print_test_header
 
diff --git a/xmlschema/tests/validation/test_validation.py b/xmlschema/tests/validation/test_validation.py
index 3ba4ba4..1e4a10b 100644
--- a/xmlschema/tests/validation/test_validation.py
+++ b/xmlschema/tests/validation/test_validation.py
@@ -10,6 +10,7 @@
 # @author Davide Brunato <brunato@sissa.it>
 #
 import unittest
+import sys
 
 import xmlschema
 from xmlschema import XMLSchemaValidationError
@@ -55,7 +56,13 @@ class TestValidation(XsdValidatorTestCase):
             path_line = str(err).splitlines()[-1]
         else:
             path_line = ''
-        self.assertEqual('Path: /vhx:vehicles/vhx:cars', path_line)
+
+        if sys.version_info >= (3, 6):
+            self.assertEqual('Path: /vhx:vehicles/vhx:cars', path_line)
+        else:
+            self.assertTrue(
+                'Path: /vh:vehicles/vh:cars' == path_line or 'Path: /vhx:vehicles/vhx:cars', path_line
+            )  # Due to unordered dicts
 
         # Issue #80
         vh_2_xt = ElementTree.parse(vh_2_file)

From df6eb235167a70ac58943f51ea51967a52191867 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 24 Oct 2019 22:13:06 +0200
Subject: [PATCH 14/34] Add XML data depth limits

  - Add module xmlschema.limits for store processing limits
  - Add max_depth optional argument to decode methods
  - Code cleaning for iter_decode() kwargs (elements and groups)
---
 doc/usage.rst                                 | 79 ++++++++++++-------
 xmlschema/__init__.py                         |  1 +
 xmlschema/limits.py                           | 21 +++++
 xmlschema/tests/validation/test_validation.py | 24 +++++-
 .../tests/validators/test_schema_class.py     |  6 +-
 xmlschema/validators/elements.py              | 62 +++++++++------
 xmlschema/validators/exceptions.py            |  1 -
 xmlschema/validators/groups.py                | 56 +++++++------
 xmlschema/validators/models.py                | 23 +++---
 xmlschema/validators/schema.py                | 38 +++------
 xmlschema/validators/xsdbase.py               | 22 ++++++
 11 files changed, 209 insertions(+), 124 deletions(-)
 create mode 100644 xmlschema/limits.py

diff --git a/doc/usage.rst b/doc/usage.rst
index bb22bff..6087211 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -526,35 +526,6 @@ For example you can build a schema using a *strict* mode and then decode XML dat
 using the *validation* argument setted to 'lax'.
 
 
-XML entity-based attacks protection
------------------------------------
-
-The XML data resource loading is protected using the  `SafeXMLParser` class, a subclass of
-the pure Python version of XMLParser that forbids the use of entities.
-The protection is applied both to XSD schemas and to XML data. The usage of this feature is
-regulated by the XMLSchema's argument *defuse*.
-For default this argument has value *'remote'* that means the protection on XML data is
-applied only to data loaded from remote. Other values for this argument can be *'always'*
-and *'never'*.
-
-
-Limit on model groups checking
-------------------------------
-
-From release v1.0.11 the model groups of the schemas are checked against restriction violations
-and *Unique Particle Attribution* violations.
-
-To avoids XSD model recursion attacks a limit of ``MAX_MODEL_DEPTH = 15`` is set. If this limit
-is exceeded an ``XMLSchemaModelDepthError`` is raised, the error is caught and a warning is generated.
-If you need to set an higher limit for checking all your groups you can import the library and change
-the value in the specific module that processes the model checks:
-
-.. doctest::
-
-    >>> import xmlschema
-    >>> xmlschema.validators.models.MAX_MODEL_DEPTH = 20
-
-
 Lazy validation
 ---------------
 
@@ -570,3 +541,53 @@ From release v1.0.14 XSD 1.1 support has been added to the library through the c
 :class:`XMLSchema11`. You have to use this class for XSD 1.1 schemas instead the default
 class :class:`XMLSchema` that is still linked to XSD 1.0 validator :class:`XMLSchema10`.
 From next minor release (v1.1) the default class will become :class:`XMLSchema11`.
+
+
+XML entity-based attacks protection
+...................................
+
+The XML data resource loading is protected using the  `SafeXMLParser` class, a subclass of
+the pure Python version of XMLParser that forbids the use of entities.
+The protection is applied both to XSD schemas and to XML data. The usage of this feature is
+regulated by the XMLSchema's argument *defuse*.
+For default this argument has value *'remote'* that means the protection on XML data is
+applied only to data loaded from remote. Other values for this argument can be *'always'*
+and *'never'*.
+
+Processing limits
+-----------------
+
+From release v1.0.16 a module has been added in order to group constants that define
+processing limits, generally to protect against attacks prepared to exhaust system
+resources. These limits usually don't need to be changed, but this possibility has
+been left at the module level for situations where a different setting is needed.
+
+Limit on XSD model groups checking
+..................................
+
+Model groups of the schemas are checked against restriction violations and *Unique Particle
+Attribution* violations. To avoids XSD model recursion attacks a depth limit of 15 levels
+is set. If this limit is exceeded an ``XMLSchemaModelDepthError`` is raised, the error is
+caught and a warning is generated. If you need to set an higher limit for checking all your
+groups you can import the library and change the value of ``MAX_MODEL_DEPTH`` in the limits
+module:
+
+.. doctest::
+
+    >>> import xmlschema
+    >>> xmlschema.limits.MAX_MODEL_DEPTH = 20
+
+
+Limit on XML data depth
+.......................
+
+A limit of 9999 on maximum depth is set for XML validation/decoding/encoding to avoid
+attacks based on extremely deep XML data. To increase or decrease this limit change the
+value of ``MAX_XML_DEPTH`` in the module *limits* after the import of the package:
+
+.. doctest::
+
+    >>> import xmlschema
+    >>> xmlschema.limits.MAX_XML_DEPTH = 1000
+
+
diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py
index d800a17..cfcf02e 100644
--- a/xmlschema/__init__.py
+++ b/xmlschema/__init__.py
@@ -8,6 +8,7 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
+from . import limits
 from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError, \
     XMLSchemaNamespaceError
 from .etree import etree_tostring
diff --git a/xmlschema/limits.py b/xmlschema/limits.py
new file mode 100644
index 0000000..9ef9489
--- /dev/null
+++ b/xmlschema/limits.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c), 2016-2019, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+"""Package protection limits. Values can be changed after import to set different limits."""
+
+MAX_XML_DEPTH = 9999
+"""
+Maximum depth of XML data. An `XMLSchemaValidationError` is raised if this limit is exceeded.
+"""
+
+MAX_MODEL_DEPTH = 15
+"""
+Maximum XSD model group depth. An `XMLSchemaModelDepthError` is raised if this limit is exceeded.
+"""
diff --git a/xmlschema/tests/validation/test_validation.py b/xmlschema/tests/validation/test_validation.py
index 1e4a10b..083bbd1 100644
--- a/xmlschema/tests/validation/test_validation.py
+++ b/xmlschema/tests/validation/test_validation.py
@@ -77,13 +77,33 @@ class TestValidation(XsdValidatorTestCase):
 
         self.assertRaises(XMLSchemaValidationError, xsd_element.decode, source.root, namespaces=namespaces)
 
-        # Testing adding 'no_depth' argument
         for result in xsd_element.iter_decode(source.root, 'strict', namespaces=namespaces,
-                                              source=source, no_depth=True):
+                                              source=source, max_depth=1):
             del result
 
         self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True))
 
+    def test_max_depth_argument(self):
+        schema = self.schema_class(self.col_xsd_file)
+        self.assertEqual(
+            schema.decode(self.col_xml_file, max_depth=1),
+            {'@xmlns:col': 'http://example.com/ns/collection',
+             '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
+             '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd'})
+
+        xmlschema.limits.MAX_XML_DEPTH = 1
+        with self.assertRaises(XMLSchemaValidationError):
+            self.assertEqual(schema.decode(self.col_xml_file))
+        xmlschema.limits.MAX_XML_DEPTH = 9999
+
+        self.assertEqual(
+            schema.decode(self.col_xml_file, max_depth=2),
+            {'@xmlns:col': 'http://example.com/ns/collection',
+             '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
+             '@xsi:schemaLocation': 'http://example.com/ns/collection collection.xsd',
+             'object': [{'@id': 'b0836217462', '@available': True},
+                        {'@id': 'b0836217463', '@available': True}]})
+
 
 class TestValidation11(TestValidation):
     schema_class = XMLSchema11
diff --git a/xmlschema/tests/validators/test_schema_class.py b/xmlschema/tests/validators/test_schema_class.py
index 45be457..1253a47 100644
--- a/xmlschema/tests/validators/test_schema_class.py
+++ b/xmlschema/tests/validators/test_schema_class.py
@@ -142,10 +142,12 @@ class TestXMLSchema10(XsdValidatorTestCase):
                      "Remote networks are not accessible or avoid SSL verification error on Windows.")
     def test_remote_schemas_loading(self):
         col_schema = self.schema_class("https://raw.githubusercontent.com/brunato/xmlschema/master/"
-                                       "xmlschema/tests/test_cases/examples/collection/collection.xsd")
+                                       "xmlschema/tests/test_cases/examples/collection/collection.xsd",
+                                       timeout=300)
         self.assertTrue(isinstance(col_schema, self.schema_class))
         vh_schema = self.schema_class("https://raw.githubusercontent.com/brunato/xmlschema/master/"
-                                      "xmlschema/tests/test_cases/examples/vehicles/vehicles.xsd")
+                                      "xmlschema/tests/test_cases/examples/vehicles/vehicles.xsd",
+                                      timeout=300)
         self.assertTrue(isinstance(vh_schema, self.schema_class))
 
     def test_schema_defuse(self):
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index a5fdc3f..2b7fe2e 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -458,14 +458,12 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
             text = self.fixed if self.fixed is not None else self.default
         return self.type.text_decode(text)
 
-    def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs):
+    def iter_decode(self, elem, validation='lax', **kwargs):
         """
         Creates an iterator for decoding an Element instance.
 
         :param elem: the Element that has to be decoded.
         :param validation: the validation mode, can be 'lax', 'strict' or 'skip.
-        :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding.
-        :param level: the depth of the element in the tree structure.
         :param kwargs: keyword arguments for the decoding process.
         :return: yields a decoded object, eventually preceded by a sequence of \
         validation or decoding errors.
@@ -473,8 +471,19 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
         if self.abstract:
             yield self.validation_error(validation, "cannot use an abstract element for validation", elem, **kwargs)
 
-        if not isinstance(converter, XMLSchemaConverter):
-            converter = self.schema.get_converter(converter, level=level, **kwargs)
+        try:
+            level = kwargs['level']
+        except KeyError:
+            level = 0
+
+        try:
+            converter = kwargs['converter']
+        except KeyError:
+            converter = kwargs['converter'] = self.get_converter(**kwargs)
+        else:
+            if not isinstance(converter, XMLSchemaConverter):
+                converter = kwargs['converter'] = self.get_converter(**kwargs)
+
         inherited = kwargs.get('inherited')
         value = content = attributes = None
 
@@ -492,7 +501,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
 
         # Decode attributes
         attribute_group = self.get_attributes(xsd_type)
-        for result in attribute_group.iter_decode(elem.attrib, validation, level=level, **kwargs):
+        for result in attribute_group.iter_decode(elem.attrib, validation, **kwargs):
             if isinstance(result, XMLSchemaValidationError):
                 yield self.validation_error(validation, result, elem, **kwargs)
             else:
@@ -529,8 +538,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
                 for error in assertion(elem, **kwargs):
                     yield self.validation_error(validation, error, **kwargs)
 
-            for result in xsd_type.content_type.iter_decode(
-                    elem, validation, converter, level + 1, **kwargs):
+            for result in xsd_type.content_type.iter_decode(elem, validation, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield self.validation_error(validation, result, elem, **kwargs)
                 else:
@@ -601,29 +609,40 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
             del content
 
         if validation != 'skip':
-            for constraint in self.identities.values():
-                if isinstance(constraint, XsdKeyref) and '_no_deep' in kwargs:  # TODO: Complete lazy validation
-                    continue
-                for error in constraint(elem, converter):
-                    yield self.validation_error(validation, error, elem, **kwargs)
+            if 'max_depth' in kwargs:
+                # Don't check key references with lazy or shallow validation
+                for constraint in filter(lambda x: not isinstance(x, XsdKeyref), self.identities.values()):
+                    for error in constraint(elem, converter):
+                        yield self.validation_error(validation, error, elem, **kwargs)
+            else:
+                for constraint in self.identities.values():
+                    for error in constraint(elem, converter):
+                        yield self.validation_error(validation, error, elem, **kwargs)
 
-    def iter_encode(self, obj, validation='lax', converter=None, level=0, **kwargs):
+    def iter_encode(self, obj, validation='lax', **kwargs):
         """
         Creates an iterator for encoding data to an Element.
 
         :param obj: the data that has to be encoded.
         :param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
-        :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \
-        for the encoding.
-        :param level: the depth of the element data in the tree structure.
         :param kwargs: keyword arguments for the encoding process.
         :return: yields an Element, eventually preceded by a sequence of \
         validation or encoding errors.
         """
-        if not isinstance(converter, XMLSchemaConverter):
-            converter = self.schema.get_converter(converter, level=level, **kwargs)
-        element_data = converter.element_encode(obj, self, level)
+        try:
+            converter = kwargs['converter']
+        except KeyError:
+            converter = kwargs['converter'] = self.get_converter(**kwargs)
+        else:
+            if not isinstance(converter, XMLSchemaConverter):
+                converter = kwargs['converter'] = self.get_converter(**kwargs)
 
+        try:
+            level = kwargs['level']
+        except KeyError:
+            level = 0
+
+        element_data = converter.element_encode(obj, self, level)
         errors = []
         tag = element_data.tag
         text = None
@@ -683,8 +702,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
                     else:
                         text = result
         else:
-            for result in xsd_type.content_type.iter_encode(
-                    element_data, validation, converter, level + 1, **kwargs):
+            for result in xsd_type.content_type.iter_encode(element_data, validation, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     errors.append(result)
                 elif result:
diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py
index 4ff969a..d47d60a 100644
--- a/xmlschema/validators/exceptions.py
+++ b/xmlschema/validators/exceptions.py
@@ -15,7 +15,6 @@ from __future__ import unicode_literals
 
 from ..compat import PY3, string_base_type
 from ..exceptions import XMLSchemaException, XMLSchemaWarning, XMLSchemaValueError
-from ..namespaces import get_namespace
 from ..qnames import qname_to_prefixed
 from ..etree import etree_tostring, etree_getpath
 from ..helpers import is_etree_element
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index e5345b1..e248c0c 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -14,6 +14,7 @@ This module contains classes for XML Schema model groups.
 from __future__ import unicode_literals
 import warnings
 
+from .. import limits
 from ..compat import unicode_type
 from ..exceptions import XMLSchemaValueError
 from ..etree import etree_element
@@ -555,15 +556,12 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
                 msg = "Maybe a not equivalent type table between elements %r and %r." % (self, xsd_element)
                 warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3)
 
-    def iter_decode(self, elem, validation='lax', converter=None, level=0, **kwargs):
+    def iter_decode(self, elem, validation='lax', **kwargs):
         """
         Creates an iterator for decoding an Element content.
 
         :param elem: the Element that has to be decoded.
         :param validation: the validation mode, can be 'lax', 'strict' or 'skip.
-        :param converter: an :class:`XMLSchemaConverter` subclass or instance \
-        to use for the decoding.
-        :param level: the depth of the element in the tree structure.
         :param kwargs: keyword arguments for the decoding process.
         :return: yields a list of 3-tuples (key, decoded data, decoder), \
         eventually preceded by a sequence of validation or decoding errors.
@@ -590,16 +588,21 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
                 result_list.append((cdata_index, text, None))
                 cdata_index += 1
 
-        model = ModelVisitor(self)
-        errors = []
+        level = kwargs['level'] = kwargs.pop('level', 0) + 1
+        if level > limits.MAX_XML_DEPTH:
+            reason = "XML data depth exceeded (MAX_XML_DEPTH=%r)" % limits.MAX_XML_DEPTH
+            self.validation_error('strict', reason, elem, **kwargs)
 
         try:
-            default_namespace = converter.get('')
-        except (AttributeError, TypeError):
-            converter = self.schema.get_converter(converter, level=level, **kwargs)
-            default_namespace = converter.get('')
+            converter = kwargs['converter']
+        except KeyError:
+            converter = kwargs['converter'] = self.get_converter(**kwargs)
 
+        default_namespace = converter.get('')
+        model = ModelVisitor(self)
+        errors = []
         model_broken = False
+
         for index, child in enumerate(elem):
             if callable(child.tag):
                 continue  # child is a <class 'lxml.etree._Comment'>
@@ -646,12 +649,13 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
                         xsd_element = None
                         model_broken = True
 
-            if xsd_element is None or kwargs.get('no_depth'):
-                # TODO: use a default decoder str-->str??
+            if 'max_depth' in kwargs and kwargs['max_depth'] <= level:
+                continue
+            elif xsd_element is None:
+                # TODO: apply a default decoder str-->str??
                 continue
 
-            for result in xsd_element.iter_decode(
-                    child, validation, converter=converter, level=level, **kwargs):
+            for result in xsd_element.iter_decode(child, validation, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
@@ -678,16 +682,12 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
 
         yield result_list
 
-    def iter_encode(self, element_data, validation='lax', converter=None, level=0, indent=4, **kwargs):
+    def iter_encode(self, element_data, validation='lax', **kwargs):
         """
         Creates an iterator for encoding data to a list containing Element data.
 
         :param element_data: an ElementData instance with unencoded data.
         :param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
-        :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \
-        for the encoding.
-        :param level: the depth of the element data in the tree structure.
-        :param indent: number of spaces for XML indentation (default is 4).
         :param kwargs: keyword arguments for the encoding process.
         :return: yields a couple with the text of the Element and a list of 3-tuples \
         (key, decoded data, decoder), eventually preceded by a sequence of validation \
@@ -697,19 +697,26 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
             yield element_data.content
             return
 
+        level = kwargs['level'] = kwargs.pop('level', 0) + 1
         errors = []
         text = None
         children = []
+        try:
+            indent = kwargs['indent']
+        except KeyError:
+            indent = 4
+
         padding = '\n' + ' ' * indent * level
 
         try:
-            default_namespace = converter.get('')
-        except (AttributeError, TypeError):
-            converter = self.schema.get_converter(converter, level=level, **kwargs)
-            default_namespace = converter.get('')
+            converter = kwargs['converter']
+        except KeyError:
+            converter = kwargs['converter'] = self.get_converter(**kwargs)
 
+        default_namespace = converter.get('')
         model = ModelVisitor(self)
         cdata_index = 0
+
         if isinstance(element_data.content, dict) or kwargs.get('unordered'):
             content = model.iter_unordered_content(element_data.content)
         elif not isinstance(element_data.content, list):
@@ -766,8 +773,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
                                 yield self.validation_error(validation, reason, value, **kwargs)
                             continue
 
-            for result in xsd_element.iter_encode(
-                    value, validation, converter=converter, level=level, indent=indent, **kwargs):
+            for result in xsd_element.iter_encode(value, validation, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index 7a904f4..77c237f 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -14,17 +14,13 @@ This module contains classes and functions for processing XSD content models.
 from __future__ import unicode_literals
 from collections import defaultdict, deque, Counter
 
+from .. import limits
 from ..compat import PY3, MutableSequence
 from ..exceptions import XMLSchemaValueError
 from .exceptions import XMLSchemaModelError, XMLSchemaModelDepthError
 from .xsdbase import ParticleMixin
 from .wildcards import XsdAnyElement, Xsd11AnyElement
 
-MAX_MODEL_DEPTH = 15
-"""Limit depth for safe visiting of models"""
-
-XSD_GROUP_MODELS = {'sequence', 'choice', 'all'}
-
 
 class ModelGroup(MutableSequence, ParticleMixin):
     """
@@ -34,7 +30,6 @@ class ModelGroup(MutableSequence, ParticleMixin):
     parent = None
 
     def __init__(self, model):
-        assert model in XSD_GROUP_MODELS, "Not a valid value for 'model'"
         self._group = []
         self.model = model
 
@@ -61,7 +56,7 @@ class ModelGroup(MutableSequence, ParticleMixin):
 
     def __setattr__(self, name, value):
         if name == 'model' and value is not None:
-            if value not in XSD_GROUP_MODELS:
+            if value not in {'sequence', 'choice', 'all'}:
                 raise XMLSchemaValueError("invalid model group %r." % value)
             if self.model is not None and value != self.model and self.model != 'all':
                 raise XMLSchemaValueError("cannot change group model from %r to %r" % (self.model, value))
@@ -165,11 +160,11 @@ class ModelGroup(MutableSequence, ParticleMixin):
         """
         A generator function iterating elements and groups of a model group. Skips pointless groups,
         iterating deeper through them. Raises `XMLSchemaModelDepthError` if the argument *depth* is
-        over `MAX_MODEL_DEPTH` value.
+        over `limits.MAX_MODEL_DEPTH` value.
 
         :param depth: guard for protect model nesting bombs, incremented at each deepest recursion.
         """
-        if depth > MAX_MODEL_DEPTH:
+        if depth > limits.MAX_MODEL_DEPTH:
             raise XMLSchemaModelDepthError(self)
         for item in self:
             if not isinstance(item, ModelGroup):
@@ -183,11 +178,11 @@ class ModelGroup(MutableSequence, ParticleMixin):
     def iter_elements(self, depth=0):
         """
         A generator function iterating model's elements. Raises `XMLSchemaModelDepthError` if the
-        argument *depth* is over `MAX_MODEL_DEPTH` value.
+        argument *depth* is over `limits.MAX_MODEL_DEPTH` value.
 
         :param depth: guard for protect model nesting bombs, incremented at each deepest recursion.
         """
-        if depth > MAX_MODEL_DEPTH:
+        if depth > limits.MAX_MODEL_DEPTH:
             raise XMLSchemaModelDepthError(self)
         for item in self:
             if isinstance(item, ModelGroup):
@@ -203,12 +198,12 @@ class ModelGroup(MutableSequence, ParticleMixin):
         :raises: an `XMLSchemaModelError` at first violated constraint.
         """
         def safe_iter_path(group, depth):
-            if depth > MAX_MODEL_DEPTH:
+            if not depth:
                 raise XMLSchemaModelDepthError(group)
             for item in group:
                 if isinstance(item, ModelGroup):
                     current_path.append(item)
-                    for _item in safe_iter_path(item, depth + 1):
+                    for _item in safe_iter_path(item, depth - 1):
                         yield _item
                     current_path.pop()
                 else:
@@ -221,7 +216,7 @@ class ModelGroup(MutableSequence, ParticleMixin):
         except AttributeError:
             any_element = None
 
-        for e in safe_iter_path(self, 0):
+        for e in safe_iter_path(self, limits.MAX_MODEL_DEPTH):
             for pe, previous_path in paths.values():
                 # EDC check
                 if not e.is_consistent(pe) or any_element and not any_element.is_consistent(pe):
diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
index 1277d26..685f5dd 100644
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@@ -822,27 +822,6 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         except KeyError:
             return []
 
-    def get_converter(self, converter=None, namespaces=None, **kwargs):
-        """
-        Returns a new converter instance.
-
-        :param converter: can be a converter class or instance. If it's an instance \
-        the new instance is copied from it and configured with the provided arguments.
-        :param namespaces: is an optional mapping from namespace prefix to URI.
-        :param kwargs: optional arguments for initialize the converter instance.
-        :return: a converter instance.
-        """
-        if converter is None:
-            converter = getattr(self, 'converter', XMLSchemaConverter)
-
-        if isinstance(converter, XMLSchemaConverter):
-            return converter.copy(namespaces=namespaces, **kwargs)
-        elif issubclass(converter, XMLSchemaConverter):
-            return converter(namespaces, **kwargs)
-        else:
-            msg = "'converter' argument must be a %r subclass or instance: %r"
-            raise XMLSchemaTypeError(msg % (XMLSchemaConverter, converter))
-
     def get_element(self, tag, path=None, namespaces=None):
         if not path:
             return self.find(tag, namespaces)
@@ -1223,16 +1202,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         inherited = {}
 
         if source.is_lazy() and path is None:
-            # TODO: Document validation in lazy mode.
-            # Validation is done pushing a _no_deep argument for root node and with
-            # a path='*' for validating children. This is a feature under test.
             xsd_element = self.get_element(source.root.tag, schema_path)
             if xsd_element is None:
-                yield self.validation_error('lax', "%r is not an element of the schema" % source.root, source.root)
+                msg = "%r is not an element of the schema"
+                yield self.validation_error('lax', msg % source.root, source.root)
 
             for result in xsd_element.iter_decode(source.root, source=source, namespaces=namespaces,
-                                                  use_defaults=use_defaults, id_map=id_map, no_depth=True,
-                                                  inherited=inherited, drop_results=True):
+                                                  use_defaults=use_defaults, id_map=id_map,
+                                                  inherited=inherited, max_depth=1):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
@@ -1249,7 +1226,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
 
             for result in xsd_element.iter_decode(elem, source=source, namespaces=namespaces,
                                                   use_defaults=use_defaults, id_map=id_map,
-                                                  inherited=inherited, drop_results=True):
+                                                  inherited=inherited):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
@@ -1264,7 +1241,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
 
     def iter_decode(self, source, path=None, schema_path=None, validation='lax', process_namespaces=True,
                     namespaces=None, use_defaults=True, decimal_type=None, datetime_types=False,
-                    converter=None, filler=None, fill_missing=False, **kwargs):
+                    converter=None, filler=None, fill_missing=False, max_depth=None, **kwargs):
         """
         Creates an iterator for decoding an XML source to a data structure.
 
@@ -1292,6 +1269,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         an attribute declaration. If not provided undecodable data is replaced by `None`.
         :param fill_missing: if set to `True` the decoder fills also missing attributes. \
         The filling value is `None` or a typed value if the *filler* callback is provided.
+        :param max_depth: maximum level of decoding. For default has no limit.
         :param kwargs: keyword arguments with other options for converter and decoder.
         :return: yields a decoded data object, eventually preceded by a sequence of validation \
         or decoding errors.
@@ -1323,6 +1301,8 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
             kwargs['decimal_type'] = decimal_type
         if filler is not None:
             kwargs['filler'] = filler
+        if max_depth is not None:
+            kwargs['max_depth'] = max_depth
 
         for elem in source.iterfind(path, namespaces):
             xsd_element = self.get_element(elem.tag, schema_path, namespaces)
diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py
index 13393ee..fe04ca0 100644
--- a/xmlschema/validators/xsdbase.py
+++ b/xmlschema/validators/xsdbase.py
@@ -21,6 +21,7 @@ from ..qnames import XSD_ANNOTATION, XSD_APPINFO, XSD_DOCUMENTATION, XML_LANG, \
     get_qname, local_name, qname_to_prefixed
 from ..etree import etree_tostring
 from ..helpers import is_etree_element
+from ..converters import XMLSchemaConverter
 from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, \
     XMLSchemaDecodeError, XMLSchemaEncodeError
 
@@ -195,6 +196,27 @@ class XsdValidator(object):
             self.parse_error(msg % (value, ' | '.join(admitted_values)), elem)
             return ''
 
+    def get_converter(self, converter=None, namespaces=None, **kwargs):
+        """
+        Returns a new converter instance.
+
+        :param converter: can be a converter class or instance. If it's an instance \
+        the new instance is copied from it and configured with the provided arguments.
+        :param namespaces: is an optional mapping from namespace prefix to URI.
+        :param kwargs: optional arguments for initialize the converter instance.
+        :return: a converter instance.
+        """
+        if converter is None:
+            converter = getattr(self, 'converter', XMLSchemaConverter)
+
+        if isinstance(converter, XMLSchemaConverter):
+            return converter.copy(namespaces=namespaces, **kwargs)
+        elif issubclass(converter, XMLSchemaConverter):
+            return converter(namespaces, **kwargs)
+        else:
+            msg = "'converter' argument must be a %r subclass or instance: %r"
+            raise XMLSchemaTypeError(msg % (XMLSchemaConverter, converter))
+
 
 class XsdComponent(XsdValidator):
     """

From ded91458a142d3b5197789350fb010bcc0d0e91c Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 25 Oct 2019 10:18:34 +0200
Subject: [PATCH 15/34] Extend schema validation to match every defined global
 element

  - Should be a fix for issue #140
---
 .../features/namespaces/import-case4-1.xml    |  5 ++
 .../features/namespaces/import-case4-2.xml    |  7 ++
 .../features/namespaces/import-case4a.xsd     | 24 ++++++
 .../features/namespaces/import-case4b.xsd     | 24 ++++++
 xmlschema/tests/test_cases/testfiles          |  4 +
 xmlschema/validators/schema.py                | 83 +++++++++++++------
 6 files changed, 123 insertions(+), 24 deletions(-)
 create mode 100644 xmlschema/tests/test_cases/features/namespaces/import-case4-1.xml
 create mode 100644 xmlschema/tests/test_cases/features/namespaces/import-case4-2.xml
 create mode 100644 xmlschema/tests/test_cases/features/namespaces/import-case4a.xsd
 create mode 100644 xmlschema/tests/test_cases/features/namespaces/import-case4b.xsd

diff --git a/xmlschema/tests/test_cases/features/namespaces/import-case4-1.xml b/xmlschema/tests/test_cases/features/namespaces/import-case4-1.xml
new file mode 100644
index 0000000..07e306c
--- /dev/null
+++ b/xmlschema/tests/test_cases/features/namespaces/import-case4-1.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<b:rootB xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://example.com/xmlschema/ns-A import-case4a.xsd"
+         xmlns:b="http://example.com/xmlschema/ns-B" />
+
diff --git a/xmlschema/tests/test_cases/features/namespaces/import-case4-2.xml b/xmlschema/tests/test_cases/features/namespaces/import-case4-2.xml
new file mode 100644
index 0000000..a15a214
--- /dev/null
+++ b/xmlschema/tests/test_cases/features/namespaces/import-case4-2.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<a:rootA xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://example.com/xmlschema/ns-B import-case4b.xsd"
+         xmlns:a="http://example.com/xmlschema/ns-A"
+         xmlns:b="http://example.com/xmlschema/ns-B">
+    <b:rootB/>
+</a:rootA>
\ No newline at end of file
diff --git a/xmlschema/tests/test_cases/features/namespaces/import-case4a.xsd b/xmlschema/tests/test_cases/features/namespaces/import-case4a.xsd
new file mode 100644
index 0000000..7d87bd5
--- /dev/null
+++ b/xmlschema/tests/test_cases/features/namespaces/import-case4a.xsd
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  An valid import case: two namespaces, each one with a global element that
+  can be used as valid root element for XML instances.
+  -->
+<xs:schema
+  xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  xmlns:a="http://example.com/xmlschema/ns-A"
+  xmlns:b="http://example.com/xmlschema/ns-B"
+  targetNamespace="http://example.com/xmlschema/ns-A"
+  elementFormDefault="qualified">
+
+  <xs:import schemaLocation="import-case4b.xsd" namespace="http://example.com/xmlschema/ns-B"/>
+
+  <xs:element name="rootA" type="a:rootType"/>
+
+  <xs:complexType name="rootType">
+    <xs:sequence>
+      <xs:element ref="b:rootB" minOccurs="0" />
+    </xs:sequence>
+  </xs:complexType>
+
+</xs:schema>
+
diff --git a/xmlschema/tests/test_cases/features/namespaces/import-case4b.xsd b/xmlschema/tests/test_cases/features/namespaces/import-case4b.xsd
new file mode 100644
index 0000000..4666bf5
--- /dev/null
+++ b/xmlschema/tests/test_cases/features/namespaces/import-case4b.xsd
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  An valid import case: two namespaces, each one with a global element that
+  can be used as valid root element for XML instances.
+  -->
+<xs:schema
+  xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  xmlns:a="http://example.com/xmlschema/ns-A"
+  xmlns:b="http://example.com/xmlschema/ns-B"
+  targetNamespace="http://example.com/xmlschema/ns-B"
+  elementFormDefault="qualified">
+
+  <xs:import schemaLocation="import-case4a.xsd" namespace="http://example.com/xmlschema/ns-A"/>
+
+  <xs:element name="rootB" type="b:rootType"/>
+
+  <xs:complexType name="rootType">
+    <xs:sequence>
+      <xs:element ref="a:rootA" minOccurs="0" />
+    </xs:sequence>
+  </xs:complexType>
+
+</xs:schema>
+
diff --git a/xmlschema/tests/test_cases/testfiles b/xmlschema/tests/test_cases/testfiles
index 7c7d62e..5f088cb 100644
--- a/xmlschema/tests/test_cases/testfiles
+++ b/xmlschema/tests/test_cases/testfiles
@@ -60,6 +60,10 @@ features/namespaces/default_ns_valid2.xsd
 features/namespaces/import-case1.xsd --errors=1  # Unknown type
 features/namespaces/import-case2.xsd --errors=1  # Missing namespace import in imported chameleon schema
 features/namespaces/import-case3.xsd
+features/namespaces/import-case4a.xsd
+features/namespaces/import-case4b.xsd
+features/namespaces/import-case4-1.xml  # This and the next are also regression tests for issue #140
+features/namespaces/import-case4-2.xml
 features/namespaces/include-case1.xsd
 features/namespaces/include-case1bis.xsd
 features/namespaces/include-case2.xsd
diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
index 685f5dd..198f230 100644
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@@ -33,7 +33,7 @@ from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \
     XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT
 from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute
 from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, VC_NAMESPACE, \
-    SCHEMAS_DIR, LOCATION_HINTS, NamespaceResourcesMap, NamespaceView
+    SCHEMAS_DIR, LOCATION_HINTS, NamespaceResourcesMap, NamespaceView, get_namespace
 from ..etree import etree_element, etree_tostring, prune_etree, ParseError
 from ..resources import is_remote_url, url_path_is_file, fetch_resource, XMLResource
 from ..converters import XMLSchemaConverter
@@ -1201,15 +1201,30 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         id_map = Counter()
         inherited = {}
 
-        if source.is_lazy() and path is None:
-            xsd_element = self.get_element(source.root.tag, schema_path)
-            if xsd_element is None:
-                msg = "%r is not an element of the schema"
-                yield self.validation_error('lax', msg % source.root, source.root)
+        namespace = source.namespace or namespaces.get('', '')
+        try:
+            schema = self.maps.namespaces[namespace][0]
+        except (KeyError, IndexError):
+            reason = 'the namespace {!r} is not loaded'.format(namespace)
+            yield self.validation_error('lax', reason, source.root, source, namespaces)
+            return
 
-            for result in xsd_element.iter_decode(source.root, source=source, namespaces=namespaces,
-                                                  use_defaults=use_defaults, id_map=id_map,
-                                                  inherited=inherited, max_depth=1):
+        kwargs = {
+            'source': source,
+            'namespaces': namespaces,
+            'use_defaults': use_defaults,
+            'id_map': id_map,
+            'inherited': inherited
+        }
+
+        if source.is_lazy() and path is None:
+            xsd_element = schema.get_element(source.root.tag, schema_path, namespaces)
+            if xsd_element is None:
+                reason = "{!r} is not an element of the schema".format(source.root)
+                yield schema.validation_error('lax', reason, source.root, source, namespaces)
+                return
+
+            for result in xsd_element.iter_decode(source.root, max_depth=1, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
@@ -1220,13 +1235,13 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                 schema_path = '/%s/*' % source.root.tag
 
         for elem in source.iterfind(path, namespaces):
-            xsd_element = self.get_element(elem.tag, schema_path, self.namespaces)
+            xsd_element = schema.get_element(elem.tag, schema_path, namespaces)
             if xsd_element is None:
-                yield self.validation_error('lax', "%r is not an element of the schema" % elem, elem)
+                reason = "{!r} is not an element of the schema".format(elem)
+                yield schema.validation_error('lax', reason, elem, source, namespaces)
+                return
 
-            for result in xsd_element.iter_decode(elem, source=source, namespaces=namespaces,
-                                                  use_defaults=use_defaults, id_map=id_map,
-                                                  inherited=inherited):
+            for result in xsd_element.iter_decode(elem, **kwargs):
                 if isinstance(result, XMLSchemaValidationError):
                     yield result
                 else:
@@ -1269,7 +1284,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         an attribute declaration. If not provided undecodable data is replaced by `None`.
         :param fill_missing: if set to `True` the decoder fills also missing attributes. \
         The filling value is `None` or a typed value if the *filler* callback is provided.
-        :param max_depth: maximum level of decoding. For default has no limit.
+        :param max_depth: maximum level of decoding, for default there is no limit.
         :param kwargs: keyword arguments with other options for converter and decoder.
         :return: yields a decoded data object, eventually preceded by a sequence of validation \
         or decoding errors.
@@ -1304,15 +1319,26 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         if max_depth is not None:
             kwargs['max_depth'] = max_depth
 
+        namespace = source.namespace or namespaces.get('', '')
+        try:
+            schema = self.maps.namespaces[namespace][0]
+        except (KeyError, IndexError):
+            reason = 'the namespace {!r} is not loaded'.format(namespace)
+            yield self.validation_error('lax', reason, source.root, source, namespaces)
+            return
+
         for elem in source.iterfind(path, namespaces):
-            xsd_element = self.get_element(elem.tag, schema_path, namespaces)
+            xsd_element = schema.get_element(elem.tag, schema_path, namespaces)
             if xsd_element is None:
-                yield self.validation_error(validation, "%r is not an element of the schema" % elem, elem)
+                reason = "{!r} is not an element of the schema".format(elem)
+                yield schema.validation_error('lax', reason, elem, source, namespaces)
+                return
 
             for obj in xsd_element.iter_decode(
-                    elem, validation, converter=converter, source=source, namespaces=namespaces,
-                    use_defaults=use_defaults, datetime_types=datetime_types,
-                    fill_missing=fill_missing, id_map=id_map, inherited=inherited, **kwargs):
+                    elem, validation, converter=converter, source=source,
+                    namespaces=namespaces, use_defaults=use_defaults,
+                    datetime_types=datetime_types, fill_missing=fill_missing,
+                    id_map=id_map, inherited=inherited, **kwargs):
                 yield obj
 
         for k, v in id_map.items():
@@ -1374,7 +1400,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         namespaces = {} if namespaces is None else namespaces.copy()
         converter = self.get_converter(converter, namespaces, **kwargs)
 
-        if path is not None:
+        namespace = get_namespace(path) or namespaces.get('', '')
+        if namespace:
+            try:
+                schema = self.maps.namespaces[namespace][0]
+            except (KeyError, IndexError):
+                reason = 'the namespace {!r} is not loaded'.format(namespace)
+                raise XMLSchemaEncodeError(self, obj, self, reason, namespaces=namespaces)
+            else:
+                xsd_element = schema.find(path, namespaces=namespaces)
+        elif path is not None:
             xsd_element = self.find(path, namespaces=namespaces)
         elif isinstance(obj, dict) and len(obj) == 1:
             xsd_element = self.elements.get(list(obj.keys())[0])
@@ -1386,10 +1421,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
 
         if not isinstance(xsd_element, XsdElement):
             if path is not None:
-                msg = "the path %r doesn't match any element of the schema!" % path
+                reason = "the path %r doesn't match any element of the schema!" % path
             else:
-                msg = "unable to select an element for decoding data, provide a valid 'path' argument."
-            yield XMLSchemaEncodeError(self, obj, self.elements, reason=msg)
+                reason = "unable to select an element for decoding data, provide a valid 'path' argument."
+            raise XMLSchemaEncodeError(self, obj, self.elements, reason, namespaces=namespaces)
         else:
             for result in xsd_element.iter_encode(obj, validation, converter=converter,
                                                   unordered=unordered, **kwargs):

From c963970549a2e58f885107aae377f869b7c69793 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 25 Oct 2019 23:02:13 +0200
Subject: [PATCH 16/34] Fix openContent's appliesToEmpty attribute use

---
 xmlschema/validators/complex_types.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py
index e45ff30..bcff57c 100644
--- a/xmlschema/validators/complex_types.py
+++ b/xmlschema/validators/complex_types.py
@@ -344,11 +344,9 @@ class XsdComplexType(XsdType, ValidationMixin):
                 "derived an empty content from base type that has not empty content.", elem
             )
 
-        if not self.open_content:
-            if self.schema.default_open_content:
+        if not self.open_content and self.schema.default_open_content:
+            if content_type or self.schema.default_open_content.applies_to_empty:
                 self.open_content = self.schema.default_open_content
-            elif getattr(base_type, 'open_content', None):
-                self.open_content = base_type.open_content
 
         if self.open_content and content_type and \
                 not self.open_content.is_restriction(base_type.open_content):
@@ -679,7 +677,13 @@ class Xsd11ComplexType(XsdComplexType):
 
         # Add open content to complex content type
         if isinstance(self.content_type, XsdGroup):
-            open_content = self.open_content or self.schema.default_open_content
+            open_content = self.open_content
+            if open_content is not None:
+                pass
+            elif self.schema.default_open_content is not None:
+                if self.content_type or self.schema.default_open_content.applies_to_empty:
+                    open_content = self.schema.default_open_content
+
             if open_content is None:
                 pass
             elif open_content.mode == 'interleave':

From 732864edc7fd35fd4bbf4d6cbcf1c3f9728e68b6 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Sat, 26 Oct 2019 23:50:38 +0200
Subject: [PATCH 17/34] Fix xs:ID counting for nodes without parent

  - Consider attributes with level+1 as child elements
  - Clean XsdAtomicBuiltin.iter_decode() method
---
 xmlschema/tests/test_w3c_suite.py     |  3 +++
 xmlschema/validators/attributes.py    |  2 ++
 xmlschema/validators/complex_types.py |  6 ++++--
 xmlschema/validators/elements.py      |  7 ++-----
 xmlschema/validators/groups.py        |  2 +-
 xmlschema/validators/simple_types.py  | 29 ++++++++++++---------------
 6 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py
index dbeb25c..2170e46 100644
--- a/xmlschema/tests/test_w3c_suite.py
+++ b/xmlschema/tests/test_w3c_suite.py
@@ -99,6 +99,9 @@ SKIPPED_TESTS = {
     '../msData/additional/test93490_4.xml',     # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078
     '../msData/additional/test93490_8.xml',     # 4799: Idem
 
+    # Valid XML tests
+    '../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml',  # defaultAttributeApply is true (false in comment)
+
     # Skip for missing XML version 1.1 implementation
     '../saxonData/XmlVersions/xv001.v01.xml',   # 14850
     '../saxonData/XmlVersions/xv003.v01.xml',   # 14852
diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py
index 78df62d..04f2dc2 100644
--- a/xmlschema/validators/attributes.py
+++ b/xmlschema/validators/attributes.py
@@ -594,7 +594,9 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin):
                 reason = "missing required attribute: %r" % k
                 yield self.validation_error(validation, reason, attrs, **kwargs)
 
+        kwargs['level'] = kwargs.get('level', 0) + 1
         use_defaults = kwargs.get('use_defaults', True)
+
         additional_attrs = [(k, v) for k, v in self.iter_predefined(use_defaults) if k not in attrs]
         if additional_attrs:
             attrs = {k: v for k, v in attrs.items()}
diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py
index bcff57c..86ef0cc 100644
--- a/xmlschema/validators/complex_types.py
+++ b/xmlschema/validators/complex_types.py
@@ -701,8 +701,10 @@ class Xsd11ComplexType(XsdComplexType):
                         self.parse_error("attribute %r must be inheritable")
 
         if 'defaultAttributesApply' in self.elem.attrib:
-            if self.elem.attrib['defaultAttributesApply'].strip() in {'false', '0'}:
-                self.default_attributes_apply = False
+            attr = self.elem.attrib['defaultAttributesApply'].strip()
+            self.default_attributes_apply = False if attr in {'false', '0'} else True
+        else:
+            self.default_attributes_apply = True
 
         # Add default attributes
         if self.redefine is None:
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index 2b7fe2e..7420670 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -474,7 +474,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
         try:
             level = kwargs['level']
         except KeyError:
-            level = 0
+            level = kwargs['level'] = 0
 
         try:
             converter = kwargs['converter']
@@ -574,15 +574,12 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
                 xsd_type = xsd_type.content_type
 
             if text is None:
-                for result in xsd_type.iter_decode('', validation, _skip_id=True, **kwargs):
+                for result in xsd_type.iter_decode('', validation, **kwargs):
                     if isinstance(result, XMLSchemaValidationError):
                         yield self.validation_error(validation, result, elem, **kwargs)
                         if 'filler' in kwargs:
                             value = kwargs['filler'](self)
             else:
-                if level == 0 or self.xsd_version != '1.0':
-                    kwargs['_skip_id'] = True
-
                 for result in xsd_type.iter_decode(text, validation, **kwargs):
                     if isinstance(result, XMLSchemaValidationError):
                         yield self.validation_error(validation, result, elem, **kwargs)
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index e248c0c..2dfe9c9 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -697,7 +697,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
             yield element_data.content
             return
 
-        level = kwargs['level'] = kwargs.pop('level', 0) + 1
+        level = kwargs['level'] = kwargs.get('level', 0) + 1
         errors = []
         text = None
         children = []
diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py
index 62bed94..182015a 100644
--- a/xmlschema/validators/simple_types.py
+++ b/xmlschema/validators/simple_types.py
@@ -513,28 +513,25 @@ class XsdAtomicBuiltin(XsdAtomic):
             yield self.decode_error(validation, obj, self.to_python,
                                     reason="value is not an instance of {!r}".format(self.instance_types))
 
-        if self.name == XSD_ID:
-            try:
-                id_map = kwargs['id_map']
-            except KeyError:
-                pass
-            else:
-                try:
-                    id_map[obj] += 1
-                except TypeError:
-                    id_map[obj] = 1
-
-                if id_map[obj] > 1 and '_skip_id' not in kwargs:
-                    yield self.validation_error(validation, "Duplicated xsd:ID value {!r}".format(obj))
-
-        elif self.name == XSD_IDREF:
+        if self.name == XSD_IDREF:
             try:
                 id_map = kwargs['id_map']
             except KeyError:
                 pass
             else:
                 if obj not in id_map:
-                    id_map[obj] = kwargs.get('node', 0)
+                    id_map[obj] = 0
+
+        elif self.name == XSD_ID and kwargs.get('level') != 0:
+            try:
+                id_map = kwargs['id_map']
+            except KeyError:
+                pass
+            else:
+                if not id_map[obj]:
+                    id_map[obj] = 1
+                else:
+                    yield self.validation_error(validation, "Duplicated xsd:ID value {!r}".format(obj))
 
         if validation == 'skip':
             try:

From 2b1497860b1e339cef454eaed783fe212c38427d Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 30 Oct 2019 07:13:47 +0100
Subject: [PATCH 18/34] Fix 'all' model groups visiting

  - Also at each match the group element is changed (TODO: check if
    it's better to restart like choice groups)
  - In XSD 1.1 __iter__ now yields wildcards at the end for 'all'
    and 'choice' model groups
---
 xmlschema/tests/test_models.py | 56 +++++++++++++++++++++++++++++++++-
 xmlschema/validators/groups.py |  5 +++
 xmlschema/validators/models.py | 11 +++++--
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index 3748ead..e8e41c4 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -15,13 +15,15 @@ This module runs tests concerning model groups validation.
 import unittest
 
 from xmlschema import XMLSchema10, XMLSchema11
-from xmlschema.validators import ModelVisitor
+from xmlschema.validators import XsdElement, ModelVisitor
 from xmlschema.compat import ordered_dict_class
 from xmlschema.tests import casepath, XsdValidatorTestCase
 
 
 class TestModelValidation(XsdValidatorTestCase):
 
+    schema_class = XMLSchema10
+
     # --- Test helper functions ---
 
     def check_advance_true(self, model, expected=None):
@@ -514,6 +516,32 @@ class TestModelValidation(XsdValidatorTestCase):
         self.check_advance_true(model)                 # match choice with <elem4>
         self.assertIsNone(model.element)
 
+    def test_empty_choice_groups(self):
+        schema = self.schema_class("""<?xml version="1.0"?>
+        <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+            <xs:group name="group1">
+                <xs:sequence>
+                    <xs:choice minOccurs="0">
+                        <xs:choice minOccurs="0"/>
+                    </xs:choice>
+                    <xs:element name="elem1"/>
+                </xs:sequence>
+            </xs:group>
+            <xs:element name="root">
+                <xs:complexType>
+                    <xs:choice>
+                        <xs:group ref="group1"/>
+                    </xs:choice>
+                </xs:complexType>
+            </xs:element>
+        </xs:schema>""")
+
+        xml_data = "<root><elem1/></root>"
+        model = ModelVisitor(schema.elements['root'].type.content_type)
+        self.assertIsInstance(model.element, XsdElement)
+        self.assertEqual(model.element.name, 'elem1')
+        self.assertIsNone(schema.validate(xml_data))
+
     #
     # Tests on issues
     def test_issue_086(self):
@@ -576,6 +604,32 @@ class TestModelValidation(XsdValidatorTestCase):
 class TestModelValidation11(TestModelValidation):
     schema_class = XMLSchema11
 
+    def test_all_model_with_wildcard(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:all>
+                            <xs:element name="a" type="xs:string" />
+                            <xs:any maxOccurs="3" processContents="lax" />
+                        </xs:all>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        xml_data = """
+            <root>
+              <wildcard1/>
+              <a>1</a>
+              <wildcard2/>
+              <wildcard3/>
+            </root>
+            """
+
+        self.assertIsNone(schema.validate(xml_data))
+
 
 class TestModelBasedSorting(XsdValidatorTestCase):
 
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index 2dfe9c9..738df4f 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -821,6 +821,11 @@ class Xsd11Group(XsdGroup):
           Content: (annotation?, (element | any | group)*)
         </all>
     """
+    def __iter__(self):
+        if self.model == 'sequence':
+            return iter(self._group)
+        return iter(sorted(self._group, key=lambda x: isinstance(x, XsdAnyElement)))
+
     def _parse_content_model(self, content_model):
         self.model = local_name(content_model.tag)
         if self.model == 'all':
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index 77c237f..46263a2 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -379,7 +379,12 @@ class ModelVisitor(MutableSequence):
     def _start(self):
         while True:
             item = next(self.items, None)
-            if item is None or not isinstance(item, ModelGroup):
+            if item is None:
+                if not self:
+                    break
+                else:
+                    self.group, self.items, self.match = self.pop()
+            elif not isinstance(item, ModelGroup):
                 self.element = item
                 break
             elif item:
@@ -464,7 +469,9 @@ class ModelVisitor(MutableSequence):
         if match:
             occurs[element] += 1
             self.match = True
-            if not element.is_over(occurs[element]):
+            if self.group.model == 'all':
+                pass
+            elif not element.is_over(occurs[element]):
                 return
 
         obj = None

From 4c624af6c91219a52865ab969436b2b1fed935cb Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 30 Oct 2019 18:05:41 +0100
Subject: [PATCH 19/34] Fix name matching and targetNamespace for XSD 1.1
 declarations

  - Use the targetNamespace of reference
  - In case of a default namespace try also the match with the local name
---
 xmlschema/validators/attributes.py |  7 +++--
 xmlschema/validators/elements.py   | 45 ++++++++++++++++++++----------
 xmlschema/validators/wildcards.py  | 17 +++++++----
 xmlschema/validators/xsdbase.py    |  2 +-
 4 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py
index 04f2dc2..ecb203b 100644
--- a/xmlschema/validators/attributes.py
+++ b/xmlschema/validators/attributes.py
@@ -286,9 +286,12 @@ class Xsd11Attribute(XsdAttribute):
 
     @property
     def target_namespace(self):
-        if self._target_namespace is None:
+        if self._target_namespace is not None:
+            return self._target_namespace
+        elif self.ref is not None:
+            return self.ref.target_namespace
+        else:
             return self.schema.target_namespace
-        return self._target_namespace
 
     def _parse(self):
         super(Xsd11Attribute, self)._parse()
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index 7420670..ba5a8b0 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -715,26 +715,38 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
 
     def is_matching(self, name, default_namespace=None, group=None):
         if default_namespace and name[0] != '{':
-            name = '{%s}%s' % (default_namespace, name)
-
-        if name in self.names:
-            return True
-
-        for xsd_element in self.iter_substitutes():
-            if name in xsd_element.names:
+            qname = '{%s}%s' % (default_namespace, name)
+            if name in self.names or qname in self.names:
                 return True
+
+            for xsd_element in self.iter_substitutes():
+                if name in xsd_element.names or qname in xsd_element.names:
+                    return True
+
+        elif name in self.names:
+            return True
+        else:
+            for xsd_element in self.iter_substitutes():
+                if name in xsd_element.names:
+                    return True
         return False
 
     def match(self, name, default_namespace=None, **kwargs):
         if default_namespace and name[0] != '{':
-            name = '{%s}%s' % (default_namespace, name)
+            qname = '{%s}%s' % (default_namespace, name)
+            if name in self.names or qname in self.names:
+                return self
 
-        if name in self.names:
+            for xsd_element in self.iter_substitutes():
+                if name in xsd_element.names or qname in xsd_element.names:
+                    return xsd_element
+
+        elif name in self.names:
             return self
-
-        for xsd_element in self.iter_substitutes():
-            if name in xsd_element.names:
-                return xsd_element
+        else:
+            for xsd_element in self.iter_substitutes():
+                if name in xsd_element.names:
+                    return xsd_element
 
     def is_restriction(self, other, check_occurs=True):
         if isinstance(other, XsdAnyElement):
@@ -905,9 +917,12 @@ class Xsd11Element(XsdElement):
 
     @property
     def target_namespace(self):
-        if self._target_namespace is None:
+        if self._target_namespace is not None:
+            return self._target_namespace
+        elif self.ref is not None:
+            return self.ref.target_namespace
+        else:
             return self.schema.target_namespace
-        return self._target_namespace
 
     def iter_components(self, xsd_classes=None):
         if xsd_classes is None:
diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
index 849c22c..beb14b0 100644
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@@ -140,7 +140,8 @@ class XsdWildcard(XsdComponent, ValidationMixin):
         elif default_namespace is None:
             return self.is_namespace_allowed('')
         else:
-            return self.is_namespace_allowed(default_namespace)
+            return self.is_namespace_allowed('') or \
+                   self.is_namespace_allowed(default_namespace)
 
     def is_namespace_allowed(self, namespace):
         if self.not_namespace:
@@ -656,12 +657,15 @@ class Xsd11AnyElement(XsdAnyElement):
         if name is None:
             return False
         elif not name or name[0] == '{':
-            namespace = get_namespace(name)
-        elif default_namespace is None:
-            namespace = ''
+            if not self.is_namespace_allowed(get_namespace(name)):
+                return False
+        elif default_namespace is not None:
+            if not self.is_namespace_allowed(''):
+                return False
         else:
             name = '{%s}%s' % (default_namespace, name)
-            namespace = default_namespace
+            if not self.is_namespace_allowed('') and not self.is_namespace_allowed(default_namespace):
+                return False
 
         if group in self.precedences:
             if occurs is None:
@@ -676,7 +680,8 @@ class Xsd11AnyElement(XsdAnyElement):
             if any(e.is_matching(name) for e in group.iter_elements()
                    if not isinstance(e, XsdAnyElement)):
                 return False
-        return name not in self.not_qname and self.is_namespace_allowed(namespace)
+
+        return name not in self.not_qname
 
     def is_consistent(self, other):
         if isinstance(other, XsdAnyElement) or self.process_contents == 'skip':
diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py
index fe04ca0..aab0b89 100644
--- a/xmlschema/validators/xsdbase.py
+++ b/xmlschema/validators/xsdbase.py
@@ -299,7 +299,7 @@ class XsdComponent(XsdValidator):
     @property
     def target_namespace(self):
         """Property that references to schema's targetNamespace."""
-        return self.schema.target_namespace
+        return self.schema.target_namespace if self.ref is None else self.ref.target_namespace
 
     @property
     def default_namespace(self):

From b95d890f51a2f84646e713e78d23ab37a3cc6288 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 31 Oct 2019 07:29:09 +0100
Subject: [PATCH 20/34] Refine 'all' models visiting

  - Restart at every match with not exhausted items
  - Do not check occurs on stop_item()
  - Do not reset element counter when repeat
---
 xmlschema/tests/test_models.py | 21 +++++++++++++++++++++
 xmlschema/validators/models.py | 19 +++++++++----------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index e8e41c4..d47c691 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -630,6 +630,27 @@ class TestModelValidation11(TestModelValidation):
 
         self.assertIsNone(schema.validate(xml_data))
 
+    def test_all_model_with_extended_occurs(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:all>
+                            <xs:element name="a" minOccurs="0" maxOccurs="5"/>
+                            <xs:element name="b" maxOccurs="5"/>
+                            <xs:element name="c" minOccurs="2" maxOccurs="5"/>
+                            <xs:element name="d" />
+                        </xs:all>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        xml_data = '<root><a/><b/><d/><c/><a/><c/></root>'
+
+        self.assertIsNone(schema.validate(xml_data))
+
 
 class TestModelBasedSorting(XsdValidatorTestCase):
 
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index 46263a2..df921b0 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -440,7 +440,10 @@ class ModelVisitor(MutableSequence):
 
             item_occurs = occurs[item]
             model = self.group.model
-            if item_occurs:
+            if model == 'all':
+                return False
+
+            elif item_occurs:
                 self.match = True
                 if model == 'choice':
                     occurs[item] = 0
@@ -470,7 +473,7 @@ class ModelVisitor(MutableSequence):
             occurs[element] += 1
             self.match = True
             if self.group.model == 'all':
-                pass
+                self.items = (e for e in self.group if not e.is_over(occurs[e]))
             elif not element.is_over(occurs[element]):
                 return
 
@@ -487,8 +490,6 @@ class ModelVisitor(MutableSequence):
                 if obj is None:
                     if not self.match:
                         if self.group.model == 'all':
-                            for e in self.group:
-                                occurs[e] = occurs[(e,)]
                             if all(e.min_occurs <= occurs[e] for e in self.group):
                                 occurs[self.group] = 1
                         group, expected = self.group, self.expected
@@ -497,16 +498,14 @@ class ModelVisitor(MutableSequence):
                     elif self.group.model != 'all':
                         self.items, self.match = iter(self.group), False
                     elif any(not e.is_over(occurs[e]) for e in self.group):
-                        for e in self.group:
-                            occurs[(e,)] += occurs[e]
                         self.items, self.match = (e for e in self.group if not e.is_over(occurs[e])), False
                     else:
-                        for e in self.group:
-                            occurs[(e,)] += occurs[e]
                         occurs[self.group] = 1
 
                 elif not isinstance(obj, ModelGroup):  # XsdElement or XsdAnyElement
-                    self.element, occurs[obj] = obj, 0
+                    self.element = obj
+                    if self.group.model != 'all':
+                        occurs[obj] = 0
                     return
 
                 else:
@@ -515,7 +514,7 @@ class ModelVisitor(MutableSequence):
                     occurs[obj] = 0
                     if obj.model == 'all':
                         for e in obj:
-                            occurs[(e,)] = 0
+                            occurs[e] = 0
 
         except IndexError:
             # Model visit ended

From dd2ab7265467eadecf3c13c1b5850d8f9b35d74c Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Tue, 5 Nov 2019 11:09:34 +0100
Subject: [PATCH 21/34] Add iter_group() to ModelVisitor

---
 xmlschema/tests/test_models.py     | 45 ++++++++++++++++++++++++++++++
 xmlschema/validators/attributes.py |  3 +-
 xmlschema/validators/groups.py     |  7 +----
 xmlschema/validators/models.py     | 31 +++++++++++++++-----
 4 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index d47c691..17bb15f 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -651,6 +651,51 @@ class TestModelValidation11(TestModelValidation):
 
         self.assertIsNone(schema.validate(xml_data))
 
+    def test_all_model_with_relaxed_occurs(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:all>
+                            <xs:element name="a" minOccurs="0" maxOccurs="5"/>
+                            <xs:element name="b" maxOccurs="5"/>
+                            <xs:element name="c" minOccurs="2" maxOccurs="unbounded"/>
+                            <xs:element name="d" />
+                        </xs:all>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        xml_data = '<root><a/><b/><d/><c/><a/><c/><c/><a/><a/><b/></root>'
+
+        self.assertIsNone(schema.validate(xml_data))
+
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:all>
+                            <xs:element name="a" minOccurs="0" maxOccurs="5"/>
+                            <xs:group ref="group1"/>
+                        </xs:all>
+                    </xs:complexType>
+                </xs:element>
+
+                <xs:group name="group1">
+                    <xs:all>
+                        <xs:element name="b" maxOccurs="5"/>
+                        <xs:element name="c" minOccurs="2" maxOccurs="unbounded"/>
+                        <xs:element name="d" />
+                    </xs:all>
+                </xs:group>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate(xml_data))
+
 
 class TestModelBasedSorting(XsdValidatorTestCase):
 
diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py
index ecb203b..051d69d 100644
--- a/xmlschema/validators/attributes.py
+++ b/xmlschema/validators/attributes.py
@@ -235,7 +235,8 @@ class XsdAttribute(XsdComponent, ValidationMixin):
             elif text == self.fixed or validation == 'skip':
                 pass
             elif self.type.text_decode(text) != self.type.text_decode(self.fixed):
-                yield self.validation_error(validation, "value differs from fixed value", text, **kwargs)
+                msg = "attribute {!r} has a fixed value {!r}".format(self.name, self.fixed)
+                yield self.validation_error(validation, msg, text, **kwargs)
 
         for result in self.type.iter_decode(text, validation, **kwargs):
             if isinstance(result, XMLSchemaValidationError):
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index 738df4f..c9ecc2e 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -821,11 +821,6 @@ class Xsd11Group(XsdGroup):
           Content: (annotation?, (element | any | group)*)
         </all>
     """
-    def __iter__(self):
-        if self.model == 'sequence':
-            return iter(self._group)
-        return iter(sorted(self._group, key=lambda x: isinstance(x, XsdAnyElement)))
-
     def _parse_content_model(self, content_model):
         self.model = local_name(content_model.tag)
         if self.model == 'all':
@@ -855,7 +850,7 @@ class Xsd11Group(XsdGroup):
                 if ref != self.name:
                     self.append(Xsd11Group(child, self.schema, self))
                     if (self.model != 'all') ^ (self[-1].model != 'all'):
-                        msg = "an xs:%s group cannot reference to an x:%s group"
+                        msg = "an xs:%s group cannot include a reference to an x:%s group"
                         self.parse_error(msg % (self.model, self[-1].model))
                         self.pop()
 
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index df921b0..fac02dc 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -338,7 +338,9 @@ class ModelVisitor(MutableSequence):
         self.occurs = Counter()
         self._subgroups = []
         self.element = None
-        self.group, self.items, self.match = root, iter(root), False
+        self.group = root
+        self.items = self.iter_group()
+        self.match = False
         self._start()
 
     def __str__(self):
@@ -374,7 +376,9 @@ class ModelVisitor(MutableSequence):
         del self._subgroups[:]
         self.occurs.clear()
         self.element = None
-        self.group, self.items, self.match = self.root, iter(self.root), False
+        self.group = self.root
+        self.items = self.iter_group()
+        self.match = False
 
     def _start(self):
         while True:
@@ -421,6 +425,18 @@ class ModelVisitor(MutableSequence):
             for e in self.advance():
                 yield e
 
+    def iter_group(self):
+        if self.group.model != 'all':
+            for item in self.group:
+                yield item
+        elif not self.occurs:
+            for e in self.group.iter_elements():
+                yield e
+        else:
+            for e in self.group.iter_elements():
+                if not e.is_over(self.occurs[e]):
+                    yield e
+
     def advance(self, match=False):
         """
         Generator function for advance to the next element. Yields tuples with
@@ -448,7 +464,7 @@ class ModelVisitor(MutableSequence):
                 if model == 'choice':
                     occurs[item] = 0
                     occurs[self.group] += 1
-                    self.items, self.match = iter(self.group), False
+                    self.items, self.match = self.iter_group(), False
                 elif model == 'sequence' and item is self.group[-1]:
                     self.occurs[self.group] += 1
                 return item.is_missing(item_occurs)
@@ -473,7 +489,7 @@ class ModelVisitor(MutableSequence):
             occurs[element] += 1
             self.match = True
             if self.group.model == 'all':
-                self.items = (e for e in self.group if not e.is_over(occurs[e]))
+                self.items = (e for e in self.group.iter_elements() if not e.is_over(occurs[e]))
             elif not element.is_over(occurs[element]):
                 return
 
@@ -490,15 +506,16 @@ class ModelVisitor(MutableSequence):
                 if obj is None:
                     if not self.match:
                         if self.group.model == 'all':
-                            if all(e.min_occurs <= occurs[e] for e in self.group):
+                            if all(e.min_occurs <= occurs[e] for e in self.group.iter_elements()):
                                 occurs[self.group] = 1
                         group, expected = self.group, self.expected
                         if stop_item(group) and expected:
                             yield group, occurs[group], expected
                     elif self.group.model != 'all':
-                        self.items, self.match = iter(self.group), False
+                        self.items, self.match = self.iter_group(), False
                     elif any(not e.is_over(occurs[e]) for e in self.group):
-                        self.items, self.match = (e for e in self.group if not e.is_over(occurs[e])), False
+                        self.items = self.iter_group()
+                        self.match = False
                     else:
                         occurs[self.group] = 1
 

From 896982222f0a9e26a8644db54e2c975cc31f5b8a Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 6 Nov 2019 09:49:00 +0100
Subject: [PATCH 22/34] Fix Windows paths normalization

---
 xmlschema/resources.py            |  2 ++
 xmlschema/tests/test_resources.py | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 55ad1ab..b65af5d 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -73,6 +73,8 @@ def normalize_url(url, base_url=None, keep_relative=False):
         x = x.strip().replace('\\', '/')
         while x.startswith('//'):
             x = x.replace('//', '/', 1)
+        while x.startswith('file:////'):
+            x = x.replace('file:////', 'file:///', 1)
         if not urlsplit(x).scheme:
             x = x.replace('#', '%23')
         return x
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 447ddad..3aaebee 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -43,6 +43,15 @@ def add_leading_slash(path):
     return '/' + path if path and path[0] not in ('/', '\\') else path
 
 
+def filter_windows_path(path):
+    if path.startswith('/\\'):
+        return path[1:]
+    elif path and path[0] not in ('/', '\\'):
+        return '/' + path
+    else:
+        return path
+
+
 class TestResources(unittest.TestCase):
 
     @classmethod
@@ -68,14 +77,14 @@ class TestResources(unittest.TestCase):
         self.assertEqual(url_parts.fragment, expected_parts.fragment, "%r: Fragment parts differ." % url)
 
         if is_windows_path(url_parts.path) or is_windows_path(expected_parts.path):
-            path = PureWindowsPath(url_parts.path)
-            expected_path = PureWindowsPath(add_leading_slash(expected_parts.path))
+            path = PureWindowsPath(filter_windows_path(url_parts.path))
+            expected_path = PureWindowsPath(filter_windows_path(expected_parts.path))
         else:
             path = PurePath(url_parts.path)
             expected_path = PurePath(expected_parts.path)
         self.assertEqual(path, expected_path, "%r: Paths differ." % url)
 
-    def test_normalize_url(self):
+    def test_normalize_url_posix(self):
         url1 = "https://example.com/xsd/other_schema.xsd"
         self.check_url(normalize_url(url1, base_url="/path_my_schema/schema.xsd"), url1)
 
@@ -98,6 +107,7 @@ class TestResources(unittest.TestCase):
         self.check_url(normalize_url('dummy path.xsd', 'http://site/base'), 'http://site/base/dummy%20path.xsd')
         self.check_url(normalize_url('dummy path.xsd', 'file://host/home/'), 'file://host/home/dummy path.xsd')
 
+    def test_normalize_url_windows(self):
         win_abs_path1 = 'z:\\Dir_1_0\\Dir2-0\\schemas/XSD_1.0/XMLSchema.xsd'
         win_abs_path2 = 'z:\\Dir-1.0\\Dir-2_0\\'
         self.check_url(normalize_url(win_abs_path1), win_abs_path1)
@@ -108,7 +118,9 @@ class TestResources(unittest.TestCase):
         self.check_url(
             normalize_url('xsd1.0/schema.xsd', win_abs_path2), 'file:///z:\\Dir-1.0\\Dir-2_0/xsd1.0/schema.xsd'
         )
+        self.check_url(normalize_url('file:///\\k:\\Dir A\\schema.xsd'), 'file:///k:\\Dir A\\schema.xsd')
 
+    def test_normalize_url_issue_116(self):
         # Issue #116
         self.assertEqual(
             normalize_url('//anaconda/envs/testenv/lib/python3.6/site-packages/xmlschema/validators/schemas/'),

From 24a08c4442798dbefc0bb8e89a0ffd4a72e05b2b Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 6 Nov 2019 10:22:09 +0100
Subject: [PATCH 23/34] Add replacing of backslashes from normalize_path result

---
 xmlschema/resources.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index b65af5d..2f9bea1 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -113,9 +113,9 @@ def normalize_url(url, base_url=None, keep_relative=False):
 
     url_parts = urlsplit(url, scheme='file')
     if url_parts.scheme not in uses_relative:
-        return 'file:///{}'.format(url_parts.geturl())  # Eg. k:/Python/lib/....
+        normalized_url = 'file:///{}'.format(url_parts.geturl())  # Eg. k:/Python/lib/....
     elif url_parts.scheme != 'file':
-        return urlunsplit((
+        normalized_url = urlunsplit((
             url_parts.scheme,
             url_parts.netloc,
             pathname2url(url_parts.path),
@@ -123,18 +123,20 @@ def normalize_url(url, base_url=None, keep_relative=False):
             url_parts.fragment,
         ))
     elif os.path.isabs(url_parts.path):
-        return url_parts.geturl()
+        normalized_url = url_parts.geturl()
     elif keep_relative:
         # Can't use urlunsplit with a scheme because it converts relative paths to absolute ones.
-        return 'file:{}'.format(urlunsplit(('',) + url_parts[1:]))
+        normalized_url = 'file:{}'.format(urlunsplit(('',) + url_parts[1:]))
     else:
-        return urlunsplit((
+        normalized_url = urlunsplit((
             url_parts.scheme,
             url_parts.netloc,
             os.path.abspath(url_parts.path),
             url_parts.query,
             url_parts.fragment,
         ))
+    
+    return normalized_url.replace('\\', '/')
 
 
 def fetch_resource(location, base_url=None, timeout=30):

From dc82f0487428116d2ef9989401315e719ff09c6c Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 6 Nov 2019 10:39:51 +0100
Subject: [PATCH 24/34] Filter normalize_url result

---
 xmlschema/resources.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 2f9bea1..b072ab2 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -75,7 +75,7 @@ def normalize_url(url, base_url=None, keep_relative=False):
             x = x.replace('//', '/', 1)
         while x.startswith('file:////'):
             x = x.replace('file:////', 'file:///', 1)
-        if not urlsplit(x).scheme:
+        if urlsplit(x).scheme in {'', 'file'}:
             x = x.replace('#', '%23')
         return x
 
@@ -136,7 +136,7 @@ def normalize_url(url, base_url=None, keep_relative=False):
             url_parts.fragment,
         ))
     
-    return normalized_url.replace('\\', '/')
+    return filter_url(normalized_url)
 
 
 def fetch_resource(location, base_url=None, timeout=30):

From b8ccfac6f104e9389684133000a1d91e459d3fc6 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 6 Nov 2019 11:40:51 +0100
Subject: [PATCH 25/34] Update test_resources avoiding usage of unapplicable
 'file' scheme

---
 xmlschema/tests/test_resources.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 3aaebee..1534393 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -120,7 +120,7 @@ class TestResources(unittest.TestCase):
         )
         self.check_url(normalize_url('file:///\\k:\\Dir A\\schema.xsd'), 'file:///k:\\Dir A\\schema.xsd')
 
-    def test_normalize_url_issue_116(self):
+    def test_normalize_url_slashes(self):
         # Issue #116
         self.assertEqual(
             normalize_url('//anaconda/envs/testenv/lib/python3.6/site-packages/xmlschema/validators/schemas/'),
@@ -134,12 +134,13 @@ class TestResources(unittest.TestCase):
         self.assertEqual(normalize_url('dir2/schema.xsd', '//root/dir1'), 'file:///root/dir1/dir2/schema.xsd')
         self.assertEqual(normalize_url('dir2/schema.xsd', '////root/dir1'), 'file:///root/dir1/dir2/schema.xsd')
 
-        self.check_url(normalize_url('issue #000.xml', 'file://host/home/'),
-                       'file://host/home/issue %23000.xml')
-        self.check_url(normalize_url('data.xml', 'file://host/home/issue 000'),
-                       'file://host/home/issue 000/data.xml')
-        self.check_url(normalize_url('data.xml', '/host/home/issue #000'),
-                       '/host/home/issue %23000/data.xml')
+    def test_normalize_url_hash_character(self):
+        self.check_url(normalize_url('issue #000.xml', 'file:///dir1/dir2/'),
+                       'file:///dir1/dir2/issue %23000.xml')
+        self.check_url(normalize_url('data.xml', 'file:///dir1/dir2/issue 000'),
+                       'file:///dir1/dir2/issue 000/data.xml')
+        self.check_url(normalize_url('data.xml', '/dir1/dir2/issue #000'),
+                       '/dir1/dir2/issue %23000/data.xml')
 
     def test_fetch_resource(self):
         wrong_path = casepath('resources/dummy_file.txt')
@@ -456,11 +457,11 @@ class TestResources(unittest.TestCase):
         xml_file = resource.open()
         self.assertTrue(callable(xml_file.read))
 
-        xml_file = open(self.vh_xml_file)
-        resource = XMLResource(source=xml_file)
-        resource.close()
-        with self.assertRaises(ValueError):
-            resource.open()
+        with open(self.vh_xml_file) as xml_file:
+            resource = XMLResource(source=xml_file)
+            resource.close()
+            with self.assertRaises(ValueError):
+                resource.open()
 
     def test_xml_resource_iter(self):
         resource = XMLResource(self.schema_class.meta_schema.source.url, lazy=False)

From d0f3a0f6c8b10d9bcb132596a2343d055e853ae6 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Wed, 6 Nov 2019 11:49:09 +0100
Subject: [PATCH 26/34] Skip ElementTree import test with external process on
 Windows platform

---
 xmlschema/tests/test_etree.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xmlschema/tests/test_etree.py b/xmlschema/tests/test_etree.py
index e039181..22e42a9 100644
--- a/xmlschema/tests/test_etree.py
+++ b/xmlschema/tests/test_etree.py
@@ -15,6 +15,7 @@ import os
 import importlib
 import sys
 import subprocess
+import platform
 
 
 @unittest.skipIf(sys.version_info < (3,), "In Python 2 ElementTree is not overwritten by cElementTree")
@@ -51,6 +52,7 @@ class TestElementTree(unittest.TestCase):
         self.assertIs(importlib.import_module('xml.etree.ElementTree'), ElementTree)
         self.assertIs(xmlschema_etree.ElementTree, ElementTree)
 
+    @unittest.skipIf(platform.system() == 'Windows', "Run only for UNIX based systems.")
     def test_element_tree_import_script(self):
         test_dir = os.path.dirname(__file__) or '.'
 

From 2bcf78549ccea9a7a7e8aefbd1cc72d02d6ca58e Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 8 Nov 2019 07:17:29 +0100
Subject: [PATCH 27/34] Add count_occurs() to ModelGroup

---
 xmlschema/resources.py            |  1 -
 xmlschema/tests/test_models.py    | 18 +++++++++++++++++
 xmlschema/validators/groups.py    | 10 ++++++----
 xmlschema/validators/models.py    | 33 ++++++++++++++++++++++++++++---
 xmlschema/validators/wildcards.py |  2 +-
 xmlschema/validators/xsdbase.py   |  7 +++++++
 6 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index b072ab2..adb9c02 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -135,7 +135,6 @@ def normalize_url(url, base_url=None, keep_relative=False):
             url_parts.query,
             url_parts.fragment,
         ))
-    
     return filter_url(normalized_url)
 
 
diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index 17bb15f..4f101c9 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -542,6 +542,24 @@ class TestModelValidation(XsdValidatorTestCase):
         self.assertEqual(model.element.name, 'elem1')
         self.assertIsNone(schema.validate(xml_data))
 
+    def test_sequence_model_with_extended_occurs(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:sequence minOccurs="2" maxOccurs="unbounded">
+                            <xs:element name="ax" maxOccurs="unbounded"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        xml_data = '<root><ax/><ax/></root>'
+
+        self.assertIsNone(schema.validate(xml_data))
+
     #
     # Tests on issues
     def test_issue_086(self):
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index c9ecc2e..2684135 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -988,16 +988,18 @@ class Xsd11Group(XsdGroup):
             for item in restriction_items:
                 if other_item is item or item.is_restriction(other_item, check_occurs):
                     if max_occurs is not None:
-                        if item.effective_max_occurs is None:
+                        effective_max_occurs = item.effective_max_occurs
+                        if effective_max_occurs is None:
                             max_occurs = None
                         else:
-                            max_occurs = counter_func(max_occurs, item.effective_max_occurs)
+                            max_occurs = counter_func(max_occurs, effective_max_occurs)
 
                     if other_max_occurs is not None:
-                        if other_item.effective_max_occurs is None:
+                        effective_max_occurs = other_item.effective_max_occurs
+                        if effective_max_occurs is None:
                             other_max_occurs = None
                         else:
-                            other_max_occurs = max(other_max_occurs, other_item.effective_max_occurs)
+                            other_max_occurs = max(other_max_occurs, effective_max_occurs)
                     break
             else:
                 continue
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index fac02dc..fc4b9af 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -156,6 +156,33 @@ class ModelGroup(MutableSequence, ParticleMixin):
             else:
                 return self.max_occurs * sum(e.max_occurs for e in self) <= other.max_occurs
 
+    def count_occurs(self, occurs):
+        """
+        Calculates the current model group occurrences from the occurs of its items.
+        """
+        group_occurs = None
+        if self.model == 'sequence':
+            for item in filter(lambda x: occurs[x], self):
+                if group_occurs is not None:
+                    return 1
+                group_occurs = item.min_occurs_reps(occurs)
+
+        elif self.model == 'choice':
+            for item in filter(lambda x: occurs[x], self):
+                group_occurs = item.min_occurs_reps(occurs)
+                break
+
+        else:
+            for item in filter(lambda x: occurs[x], self):
+                group_occurs = min(1, item.min_occurs_reps(occurs))
+
+        if group_occurs is None:
+            return 0
+        elif self.is_over(group_occurs):
+            return self.max_occurs
+        else:
+            return group_occurs
+
     def iter_model(self, depth=0):
         """
         A generator function iterating elements and groups of a model group. Skips pointless groups,
@@ -462,17 +489,17 @@ class ModelVisitor(MutableSequence):
             elif item_occurs:
                 self.match = True
                 if model == 'choice':
+                    occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
                     occurs[item] = 0
-                    occurs[self.group] += 1
                     self.items, self.match = self.iter_group(), False
                 elif model == 'sequence' and item is self.group[-1]:
-                    self.occurs[self.group] += 1
+                    self.occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
                 return item.is_missing(item_occurs)
 
             elif model == 'sequence':
                 if self.match:
                     if item is self.group[-1]:
-                        occurs[self.group] += 1
+                        occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
                     return not item.is_emptiable()
                 elif item.is_emptiable():
                     return False
diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
index beb14b0..fe2e448 100644
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@@ -141,7 +141,7 @@ class XsdWildcard(XsdComponent, ValidationMixin):
             return self.is_namespace_allowed('')
         else:
             return self.is_namespace_allowed('') or \
-                   self.is_namespace_allowed(default_namespace)
+                self.is_namespace_allowed(default_namespace)
 
     def is_namespace_allowed(self, namespace):
         if self.not_namespace:
diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py
index aab0b89..65ae512 100644
--- a/xmlschema/validators/xsdbase.py
+++ b/xmlschema/validators/xsdbase.py
@@ -958,6 +958,13 @@ class ParticleMixin(object):
     def is_over(self, occurs):
         return self.max_occurs is not None and self.max_occurs <= occurs
 
+    def min_occurs_reps(self, occurs):
+        """Returns the repetitions of minimum occurrences."""
+        if not self.min_occurs:
+            return occurs[self]
+        else:
+            return occurs[self] // self.min_occurs
+
     def has_occurs_restriction(self, other):
         if self.min_occurs == self.max_occurs == 0:
             return True

From 7c4cd8b4d3bed9d924ec3c81c5710edf056631fa Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 8 Nov 2019 16:40:02 +0100
Subject: [PATCH 28/34] Change stop_item() in ModelVisitor.advance()

  - Removed ParticleMixin.min_occurs_reps()
  - Removed ModelGroup.group_occurs()
---
 xmlschema/tests/test_models.py     | 26 ++++++++--
 xmlschema/validators/exceptions.py |  2 +-
 xmlschema/validators/models.py     | 79 ++++++++++++++----------------
 xmlschema/validators/xsdbase.py    |  7 ---
 4 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index 4f101c9..df19ae1 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -516,6 +516,8 @@ class TestModelValidation(XsdValidatorTestCase):
         self.check_advance_true(model)                 # match choice with <elem4>
         self.assertIsNone(model.element)
 
+    #
+    # Test pathological cases
     def test_empty_choice_groups(self):
         schema = self.schema_class("""<?xml version="1.0"?>
         <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
@@ -549,17 +551,35 @@ class TestModelValidation(XsdValidatorTestCase):
                 <xs:element name="root">
                     <xs:complexType>
                         <xs:sequence minOccurs="2" maxOccurs="unbounded">
-                            <xs:element name="ax" maxOccurs="unbounded"/>
+                            <xs:element name="a" maxOccurs="unbounded"/>
                         </xs:sequence>
                     </xs:complexType>
                 </xs:element>
             </xs:schema>
             """)
 
-        xml_data = '<root><ax/><ax/></root>'
-
+        xml_data = '<root><a/><a/></root>'
         self.assertIsNone(schema.validate(xml_data))
 
+    def test_choice_model_with_extended_occurs(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:choice maxOccurs="unbounded" minOccurs="0">
+                            <xs:element maxOccurs="5" minOccurs="3" name="ax"/>
+                            <xs:element maxOccurs="5" minOccurs="3" name="bx"/>
+                        </xs:choice>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/></root>'))
+        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/><ax/><ax/></root>'))
+        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/><ax/><ax/><ax/></root>'))
+
     #
     # Tests on issues
     def test_issue_086(self):
diff --git a/xmlschema/validators/exceptions.py b/xmlschema/validators/exceptions.py
index d47d60a..b766ac4 100644
--- a/xmlschema/validators/exceptions.py
+++ b/xmlschema/validators/exceptions.py
@@ -346,7 +346,7 @@ class XMLSchemaChildrenValidationError(XMLSchemaValidationError):
             if not expected_tags:
                 pass  # reason += " No child element is expected at this point." <-- this can be misleading
             elif len(expected_tags) == 1:
-                reason += " Tag %s expected." % expected_tags[0]
+                reason += " Tag %r expected." % expected_tags[0]
             else:
                 reason += " Tag (%s) expected." % ' | '.join(expected_tags)
 
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index fc4b9af..c63ca14 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -156,33 +156,6 @@ class ModelGroup(MutableSequence, ParticleMixin):
             else:
                 return self.max_occurs * sum(e.max_occurs for e in self) <= other.max_occurs
 
-    def count_occurs(self, occurs):
-        """
-        Calculates the current model group occurrences from the occurs of its items.
-        """
-        group_occurs = None
-        if self.model == 'sequence':
-            for item in filter(lambda x: occurs[x], self):
-                if group_occurs is not None:
-                    return 1
-                group_occurs = item.min_occurs_reps(occurs)
-
-        elif self.model == 'choice':
-            for item in filter(lambda x: occurs[x], self):
-                group_occurs = item.min_occurs_reps(occurs)
-                break
-
-        else:
-            for item in filter(lambda x: occurs[x], self):
-                group_occurs = min(1, item.min_occurs_reps(occurs))
-
-        if group_occurs is None:
-            return 0
-        elif self.is_over(group_occurs):
-            return self.max_occurs
-        else:
-            return group_occurs
-
     def iter_model(self, depth=0):
         """
         A generator function iterating elements and groups of a model group. Skips pointless groups,
@@ -486,27 +459,47 @@ class ModelVisitor(MutableSequence):
             if model == 'all':
                 return False
 
-            elif item_occurs:
+            elif model == 'choice':
+                if not item_occurs:
+                    return False
+
                 self.match = True
-                if model == 'choice':
-                    occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
+
+                group_occurs = min(1, occurs[item] // (item.min_occurs or 1))
+                if self.group.is_over(group_occurs):
+                    group_occurs = self.group.max_occurs
+                occurs[self.group] += group_occurs
+
+                if group_occurs == 1:
                     occurs[item] = 0
-                    self.items, self.match = self.iter_group(), False
-                elif model == 'sequence' and item is self.group[-1]:
-                    self.occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
+                else:
+                    item_occurs %= item.min_occurs
+                    occurs[item] = item_occurs
+
+                self.items, self.match = self.iter_group(), False
                 return item.is_missing(item_occurs)
 
-            elif model == 'sequence':
-                if self.match:
-                    if item is self.group[-1]:
-                        occurs[self.group] += max(1, self.group.count_occurs(self.occurs))
-                    return not item.is_emptiable()
-                elif item.is_emptiable():
-                    return False
-                elif self.group.min_occurs <= occurs[self.group] or self:
-                    return stop_item(self.group)
+            elif item_occurs:
+                self.match = True
+            elif self.match:
+                pass
+            elif item.is_emptiable():
+                return False
+            elif self.group.min_occurs <= occurs[self.group] or self:
+                return stop_item(self.group)
+            else:
+                return True
+
+            if item is self.group[-1]:
+                if any(occurs[x] for x in self if x is not item):
+                    group_occurs = 1
                 else:
-                    return True
+                    group_occurs = max(1, occurs[item] // (item.min_occurs or 1))
+                    if self.group.is_over(group_occurs):
+                        group_occurs = self.group.max_occurs
+                self.occurs[self.group] += max(1, group_occurs)
+
+            return item.is_missing(item_occurs)
 
         element, occurs = self.element, self.occurs
         if element is None:
diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py
index 65ae512..aab0b89 100644
--- a/xmlschema/validators/xsdbase.py
+++ b/xmlschema/validators/xsdbase.py
@@ -958,13 +958,6 @@ class ParticleMixin(object):
     def is_over(self, occurs):
         return self.max_occurs is not None and self.max_occurs <= occurs
 
-    def min_occurs_reps(self, occurs):
-        """Returns the repetitions of minimum occurrences."""
-        if not self.min_occurs:
-            return occurs[self]
-        else:
-            return occurs[self] // self.min_occurs
-
     def has_occurs_restriction(self, other):
         if self.min_occurs == self.max_occurs == 0:
             return True

From 79cf89af86efb95ef1c9db747f9194a45c735bc3 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 14 Nov 2019 11:14:51 +0100
Subject: [PATCH 29/34] Fix ambiguous choice models visiting

  - Fixed registering max occurs for tuple (group,)
  - TODO: maybe the same solution for 1-length sequence groups
---
 xmlschema/tests/test_models.py |  71 ++++++++++++++++++--
 xmlschema/validators/models.py | 114 ++++++++++++++++++---------------
 2 files changed, 125 insertions(+), 60 deletions(-)

diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index df19ae1..b671cd6 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -558,8 +558,48 @@ class TestModelValidation(XsdValidatorTestCase):
             </xs:schema>
             """)
 
-        xml_data = '<root><a/><a/></root>'
-        self.assertIsNone(schema.validate(xml_data))
+        self.assertIsNone(schema.validate('<root><a/><a/></root>'))
+
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:sequence minOccurs="0" maxOccurs="unbounded">
+                            <xs:element name="a" minOccurs="2" maxOccurs="unbounded"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate('<root><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
+
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:sequence minOccurs="0" maxOccurs="unbounded">
+                            <xs:group ref="group1" minOccurs="2" maxOccurs="unbounded"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
+                <xs:group name="group1">
+                    <xs:choice>
+                        <xs:element name="ax" maxOccurs="unbounded"/>
+                        <xs:element name="b"/>
+                        <xs:element name="c"/>
+                    </xs:choice>
+                </xs:group>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate('<root><ax/><ax/></root>'))
+        # self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
+        # self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
 
     def test_choice_model_with_extended_occurs(self):
         schema = self.schema_class(
@@ -568,17 +608,34 @@ class TestModelValidation(XsdValidatorTestCase):
                 <xs:element name="root">
                     <xs:complexType>
                         <xs:choice maxOccurs="unbounded" minOccurs="0">
-                            <xs:element maxOccurs="5" minOccurs="3" name="ax"/>
-                            <xs:element maxOccurs="5" minOccurs="3" name="bx"/>
+                            <xs:element maxOccurs="5" minOccurs="3" name="a"/>
+                            <xs:element maxOccurs="5" minOccurs="3" name="b"/>
                         </xs:choice>
                     </xs:complexType>
                 </xs:element>
             </xs:schema>
             """)
 
-        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/></root>'))
-        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/><ax/><ax/></root>'))
-        self.assertIsNone(schema.validate('<root><ax/><ax/><ax/><ax/><ax/><ax/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
+
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                    <xs:choice minOccurs="2" maxOccurs="3">
+                        <xs:element name="a" maxOccurs="unbounded"/>
+                        <xs:element name="b" maxOccurs="unbounded"/>
+                        <xs:element name="c"/>
+                    </xs:choice>
+                    </xs:complexType>
+                </xs:element>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
 
     #
     # Tests on issues
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index c63ca14..eb79aec 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -393,7 +393,9 @@ class ModelVisitor(MutableSequence):
                 break
             elif item:
                 self.append((self.group, self.items, self.match))
-                self.group, self.items, self.match = item, iter(item), False
+                self.group = item
+                self.items = self.iter_group()
+                self.match = False
 
     @property
     def expected(self):
@@ -426,16 +428,13 @@ class ModelVisitor(MutableSequence):
                 yield e
 
     def iter_group(self):
+        """Returns an iterator for the current model group."""
         if self.group.model != 'all':
-            for item in self.group:
-                yield item
+            return iter(self.group)
         elif not self.occurs:
-            for e in self.group.iter_elements():
-                yield e
+            return self.group.iter_elements()
         else:
-            for e in self.group.iter_elements():
-                if not e.is_over(self.occurs[e]):
-                    yield e
+            return (e for e in self.group.iter_elements() if not e.is_over(self.occurs[e]))
 
     def advance(self, match=False):
         """
@@ -444,6 +443,17 @@ class ModelVisitor(MutableSequence):
 
         :param match: provides current element match.
         """
+        def get_choices(self, occurs):
+            max_group_occurs = max(1, occurs // (self.min_occurs or 1))
+            if self.max_occurs is None:
+                return [x for x in range(1, max_group_occurs + 1)]
+            else:
+                delta_occurs = self.max_occurs - self.min_occurs + 1
+                if occurs % max_group_occurs > delta_occurs:
+                    return []
+                else:
+                    return [x for x in range(1, max_group_occurs + 1)]
+
         def stop_item(item):
             """
             Stops element or group matching, incrementing current group counter.
@@ -455,30 +465,24 @@ class ModelVisitor(MutableSequence):
                 self.group, self.items, self.match = self.pop()
 
             item_occurs = occurs[item]
-            model = self.group.model
-            if model == 'all':
-                return False
-
-            elif model == 'choice':
+            if self.group.model == 'choice':
                 if not item_occurs:
                     return False
 
-                self.match = True
+                item_max_occurs = occurs[(item,)] or item_occurs
+                min_group_occurs = max(1, item_occurs // (item.max_occurs or item_occurs))
+                max_group_occurs = max(1, item_max_occurs // (item.min_occurs or 1))
 
-                group_occurs = min(1, occurs[item] // (item.min_occurs or 1))
-                if self.group.is_over(group_occurs):
-                    group_occurs = self.group.max_occurs
-                occurs[self.group] += group_occurs
+                occurs[self.group] += min_group_occurs
+                occurs[(self.group,)] += max_group_occurs
+                occurs[item] = 0
 
-                if group_occurs == 1:
-                    occurs[item] = 0
-                else:
-                    item_occurs %= item.min_occurs
-                    occurs[item] = item_occurs
-
-                self.items, self.match = self.iter_group(), False
-                return item.is_missing(item_occurs)
+                self.items = self.iter_group()
+                self.match = False
+                return item.is_missing(max(item_occurs, occurs[(item,)]))
 
+            elif self.group.model == 'all':
+                return False
             elif item_occurs:
                 self.match = True
             elif self.match:
@@ -494,12 +498,11 @@ class ModelVisitor(MutableSequence):
                 if any(occurs[x] for x in self if x is not item):
                     group_occurs = 1
                 else:
-                    group_occurs = max(1, occurs[item] // (item.min_occurs or 1))
+                    group_occurs = max(1, item_occurs // (item.min_occurs or 1))
                     if self.group.is_over(group_occurs):
                         group_occurs = self.group.max_occurs
                 self.occurs[self.group] += max(1, group_occurs)
-
-            return item.is_missing(item_occurs)
+            return item.is_missing(max(item_occurs, occurs[(item,)]))
 
         element, occurs = self.element, self.occurs
         if element is None:
@@ -510,6 +513,9 @@ class ModelVisitor(MutableSequence):
             self.match = True
             if self.group.model == 'all':
                 self.items = (e for e in self.group.iter_elements() if not e.is_over(occurs[e]))
+            elif self.group.model == 'choice':  # or len(self.group) == 1:
+                if not element.is_over(occurs[element]) or element.is_ambiguous():
+                    return
             elif not element.is_over(occurs[element]):
                 return
 
@@ -523,40 +529,42 @@ class ModelVisitor(MutableSequence):
                     stop_item(self.group)
 
                 obj = next(self.items, None)
-                if obj is None:
-                    if not self.match:
-                        if self.group.model == 'all':
-                            if all(e.min_occurs <= occurs[e] for e in self.group.iter_elements()):
-                                occurs[self.group] = 1
-                        group, expected = self.group, self.expected
-                        if stop_item(group) and expected:
-                            yield group, occurs[group], expected
-                    elif self.group.model != 'all':
-                        self.items, self.match = self.iter_group(), False
-                    elif any(not e.is_over(occurs[e]) for e in self.group):
-                        self.items = self.iter_group()
-                        self.match = False
-                    else:
-                        occurs[self.group] = 1
+                if isinstance(obj, ModelGroup):
+                    # inner 'sequence' or 'choice' XsdGroup
+                    self.append((self.group, self.items, self.match))
+                    self.group = obj
+                    self.items = self.iter_group()
+                    self.match = False
+                    occurs[obj] = 0
 
-                elif not isinstance(obj, ModelGroup):  # XsdElement or XsdAnyElement
+                elif obj is not None:
+                    # XsdElement or XsdAnyElement
                     self.element = obj
-                    if self.group.model != 'all':
+                    if self.group.model == 'sequence':
                         occurs[obj] = 0
                     return
 
+                elif not self.match:
+                    if self.group.model == 'all':
+                        if all(e.min_occurs <= occurs[e] for e in self.group.iter_elements()):
+                            occurs[self.group] = 1
+
+                    group, expected = self.group, self.expected
+                    if stop_item(group) and expected:
+                        yield group, occurs[group], expected
+
+                elif self.group.model != 'all':
+                    self.items, self.match = self.iter_group(), False
+                elif any(not e.is_over(occurs[e]) for e in self.group):
+                    self.items = self.iter_group()
+                    self.match = False
                 else:
-                    self.append((self.group, self.items, self.match))
-                    self.group, self.items, self.match = obj, iter(obj), False
-                    occurs[obj] = 0
-                    if obj.model == 'all':
-                        for e in obj:
-                            occurs[e] = 0
+                    occurs[self.group] = 1
 
         except IndexError:
             # Model visit ended
             self.element = None
-            if self.group.is_missing(occurs[self.group]):
+            if self.group.is_missing(max(occurs[self.group], occurs[(self.group,)])):
                 if self.group.model == 'choice':
                     yield self.group, occurs[self.group], self.expected
                 elif self.group.model == 'sequence':

From 4b7b16a750052e1bc23fc13744e784521229dfdf Mon Sep 17 00:00:00 2001
From: John Vandenberg <jayvdb@gmail.com>
Date: Thu, 14 Nov 2019 23:01:40 +0700
Subject: [PATCH 30/34] setup.py: Add setup-requires

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 603a73e..fd93f00 100755
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,7 @@ class InstallCommand(install):
 setup(
     name='xmlschema',
     version='1.0.15',
+    setup_requires=['elementpath~=1.3.0'],
     install_requires=['elementpath~=1.3.0'],
     packages=['xmlschema'],
     include_package_data=True,

From fc3141283de7ff5409885b4e9fea12456520689a Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Thu, 14 Nov 2019 11:20:26 +0100
Subject: [PATCH 31/34] Fix some W3C failed tests

  - fix inherited attrs composition in XSD elements
  - check single ID for element's attributes validation
---
 xmlschema/validators/attributes.py    | 6 ++++++
 xmlschema/validators/complex_types.py | 2 +-
 xmlschema/validators/elements.py      | 9 ++++-----
 xmlschema/validators/groups.py        | 2 +-
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py
index 051d69d..cbdc1e9 100644
--- a/xmlschema/validators/attributes.py
+++ b/xmlschema/validators/attributes.py
@@ -600,6 +600,8 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin):
 
         kwargs['level'] = kwargs.get('level', 0) + 1
         use_defaults = kwargs.get('use_defaults', True)
+        id_map = kwargs.get('id_map', '')
+        num_id = len(id_map)
 
         additional_attrs = [(k, v) for k, v in self.iter_predefined(use_defaults) if k not in attrs]
         if additional_attrs:
@@ -644,6 +646,10 @@ class XsdAttributeGroup(MutableMapping, XsdComponent, ValidationMixin):
                     result_list.append((name, result))
                     break
 
+        if self.xsd_version == '1.0' and len(id_map) - num_id > 1:
+            reason = "No more than one attribute of type ID should be present in an element"
+            yield self.validation_error(validation, reason, attrs, **kwargs)
+
         if kwargs.get('fill_missing') is True:
             if filler is None:
                 result_list.extend((k, None) for k in self._attribute_group
diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py
index 86ef0cc..4010445 100644
--- a/xmlschema/validators/complex_types.py
+++ b/xmlschema/validators/complex_types.py
@@ -502,7 +502,7 @@ class XsdComplexType(XsdType, ValidationMixin):
         elif other.name == XSD_ANY_TYPE:
             return True
         elif self.base_type is other:
-            return derivation is None or self.base_type.derivation == derivation
+            return derivation is None  # or self.base_type.derivation == derivation
         elif hasattr(other, 'member_types'):
             return any(self.is_derived(m, derivation) for m in other.member_types)
         elif self.base_type is None:
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index ba5a8b0..4b2d3c7 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -21,7 +21,7 @@ from ..exceptions import XMLSchemaAttributeError
 from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_SEQUENCE, XSD_ALL, \
     XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, \
     XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, XSD_KEY, \
-    XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ID, XSD_ERROR, get_qname
+    XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ERROR, get_qname
 from ..etree import etree_element
 from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute, \
     ParticleCounter, strictly_equal
@@ -244,15 +244,13 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
             if not self.type.is_valid(attrib['default']):
                 msg = "'default' value {!r} is not compatible with the type {!r}"
                 self.parse_error(msg.format(attrib['default'], self.type))
-            elif self.xsd_version == '1.0' and (
-                    self.type.name == XSD_ID or self.type.is_derived(self.schema.meta_schema.types['ID'])):
+            elif self.xsd_version == '1.0' and self.type.is_key():
                 self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'")
         elif 'fixed' in attrib:
             if not self.type.is_valid(attrib['fixed']):
                 msg = "'fixed' value {!r} is not compatible with the type {!r}"
                 self.parse_error(msg.format(attrib['fixed'], self.type))
-            elif self.xsd_version == '1.0' and (
-                    self.type.name == XSD_ID or self.type.is_derived(self.schema.meta_schema.types['ID'])):
+            elif self.xsd_version == '1.0' and self.type.is_key():
                 self.parse_error("'xs:ID' or a type derived from 'xs:ID' cannot has a 'default'")
 
         return 0
@@ -963,6 +961,7 @@ class Xsd11Element(XsdElement):
 
         if inherited:
             dummy = etree_element('_dummy_element', attrib=inherited)
+            dummy.attrib.update(elem.attrib)
 
             for alt in filter(lambda x: x.type is not None, self.alternatives):
                 if alt.token is None or alt.test(elem) or alt.test(dummy):
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index 2684135..f3fa2ce 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -486,7 +486,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
             if 'substitution' in model_element.block \
                     or xsd_element.type.is_blocked(model_element):
                 raise XMLSchemaValidationError(
-                    model_element, "substitution of %r is blocked" % model_element
+                    model_element, elem, "substitution of %r is blocked" % model_element
                 )
 
         alternatives = ()

From 8207284c5a4f9f5a53e89aad6be57f8248686ee7 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 15 Nov 2019 09:25:04 +0100
Subject: [PATCH 32/34] Clean models.py module from unused code

---
 xmlschema/validators/models.py | 103 ++-------------------------------
 1 file changed, 6 insertions(+), 97 deletions(-)

diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index eb79aec..c26859d 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -443,17 +443,6 @@ class ModelVisitor(MutableSequence):
 
         :param match: provides current element match.
         """
-        def get_choices(self, occurs):
-            max_group_occurs = max(1, occurs // (self.min_occurs or 1))
-            if self.max_occurs is None:
-                return [x for x in range(1, max_group_occurs + 1)]
-            else:
-                delta_occurs = self.max_occurs - self.min_occurs + 1
-                if occurs % max_group_occurs > delta_occurs:
-                    return []
-                else:
-                    return [x for x in range(1, max_group_occurs + 1)]
-
         def stop_item(item):
             """
             Stops element or group matching, incrementing current group counter.
@@ -496,12 +485,13 @@ class ModelVisitor(MutableSequence):
 
             if item is self.group[-1]:
                 if any(occurs[x] for x in self if x is not item):
-                    group_occurs = 1
+                    self.occurs[self.group] += 1
                 else:
                     group_occurs = max(1, item_occurs // (item.min_occurs or 1))
                     if self.group.is_over(group_occurs):
                         group_occurs = self.group.max_occurs
-                self.occurs[self.group] += max(1, group_occurs)
+                    self.occurs[self.group] += max(1, group_occurs)
+
             return item.is_missing(max(item_occurs, occurs[(item,)]))
 
         element, occurs = self.element, self.occurs
@@ -513,11 +503,10 @@ class ModelVisitor(MutableSequence):
             self.match = True
             if self.group.model == 'all':
                 self.items = (e for e in self.group.iter_elements() if not e.is_over(occurs[e]))
-            elif self.group.model == 'choice':  # or len(self.group) == 1:
-                if not element.is_over(occurs[element]) or element.is_ambiguous():
-                    return
             elif not element.is_over(occurs[element]):
                 return
+            elif self.group.model == 'choice' and element.is_ambiguous():
+                return
 
         obj = None
         try:
@@ -535,7 +524,7 @@ class ModelVisitor(MutableSequence):
                     self.group = obj
                     self.items = self.iter_group()
                     self.match = False
-                    occurs[obj] = 0
+                    occurs[obj] = occurs[(obj,)] = 0
 
                 elif obj is not None:
                     # XsdElement or XsdAnyElement
@@ -694,83 +683,3 @@ class ModelVisitor(MutableSequence):
         for name, values in unordered_content.items():
             for v in values:
                 yield name, v
-
-
-class Occurrence(object):
-    """
-    Class for XSD particles occurrence counting and comparison.
-    """
-    def __init__(self, occurs):
-        self.occurs = occurs
-
-    def add(self, occurs):
-        if self.occurs is None:
-            pass
-        elif occurs is None:
-            self.occurs = None
-        else:
-            self.occurs += occurs
-
-    def sub(self, occurs):
-        if self.occurs is None:
-            pass
-        elif occurs is None:
-            self.occurs = 0
-        else:
-            self.occurs -= occurs
-
-    def mul(self, occurs):
-        if occurs == 0:
-            self.occurs = 0
-        elif not self.occurs:
-            pass
-        elif occurs is None:
-            self.occurs = None
-        else:
-            self.occurs *= occurs
-
-    def max(self, occurs):
-        if self.occurs is None:
-            pass
-        elif occurs is None:
-            self.occurs = occurs
-        else:
-            self.occurs = max(self.occurs, occurs)
-
-    def __eq__(self, occurs):
-        return self.occurs == occurs
-
-    def __ne__(self, occurs):
-        return self.occurs != occurs
-
-    def __ge__(self, occurs):
-        if self.occurs is None:
-            return True
-        elif occurs is None:
-            return False
-        else:
-            return self.occurs >= occurs
-
-    def __gt__(self, occurs):
-        if self.occurs is None:
-            return True
-        elif occurs is None:
-            return False
-        else:
-            return self.occurs > occurs
-
-    def __le__(self, occurs):
-        if occurs is None:
-            return True
-        elif self.occurs is None:
-            return False
-        else:
-            return self.occurs <= occurs
-
-    def __lt__(self, occurs):
-        if occurs is None:
-            return True
-        elif self.occurs is None:
-            return False
-        else:
-            return self.occurs < occurs

From a60532a3ab01d71074964cf3bdd388933783f7ed Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Fri, 15 Nov 2019 17:49:46 +0100
Subject: [PATCH 33/34] Fix sequence model stop criteria

---
 CHANGELOG.rst                  |  8 ++++---
 xmlschema/documents.py         |  2 +-
 xmlschema/tests/test_models.py | 31 +++++++++++++++++++++------
 xmlschema/validators/models.py | 39 +++++++++++++++++++++-------------
 4 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 0af63c5..f884662 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,11 +2,12 @@
 CHANGELOG
 *********
 
-`v1.0.16`_ (2019-10-XX)
+`v1.0.16`_ (2019-11-15)
 =======================
-* Improved XMLResource with zip files interface and lazy
+* Improved XMLResource class for working with compressed files
 * Fix for validation with XSD wildcards and 'lax' process content
-* Fix for issue #1...
+* Fix ambiguous items validation for xs:choice and xs:sequence models
+* Dozens of W3C's failed tests fixed
 
 `v1.0.15`_ (2019-10-13)
 =======================
@@ -271,3 +272,4 @@ v0.9.6 (2017-05-05)
 .. _v1.0.13: https://github.com/brunato/xmlschema/compare/v1.0.11...v1.0.13
 .. _v1.0.14: https://github.com/brunato/xmlschema/compare/v1.0.13...v1.0.14
 .. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15
+.. _v1.0.16: https://github.com/brunato/xmlschema/compare/v1.0.15...v1.0.16
diff --git a/xmlschema/documents.py b/xmlschema/documents.py
index 439a8c9..02e6471 100644
--- a/xmlschema/documents.py
+++ b/xmlschema/documents.py
@@ -171,7 +171,7 @@ def from_json(source, schema, path=None, converter=None, json_options=None, **kw
 
     :param source: can be a string or a :meth:`read()` supporting file-like object \
     containing the JSON document.
-    :param schema: an :class:`XMLSchema` instance.
+    :param schema: an :class:`XMLSchema` or an :class:`XMLSchema11` instance.
     :param path: is an optional XPath expression for selecting the element of the schema \
     that matches the data that has to be encoded. For default the first global element of \
     the schema is used.
diff --git a/xmlschema/tests/test_models.py b/xmlschema/tests/test_models.py
index b671cd6..a02b9b7 100644
--- a/xmlschema/tests/test_models.py
+++ b/xmlschema/tests/test_models.py
@@ -551,14 +551,14 @@ class TestModelValidation(XsdValidatorTestCase):
                 <xs:element name="root">
                     <xs:complexType>
                         <xs:sequence minOccurs="2" maxOccurs="unbounded">
-                            <xs:element name="a" maxOccurs="unbounded"/>
+                            <xs:element name="ax" maxOccurs="unbounded"/>
                         </xs:sequence>
                     </xs:complexType>
                 </xs:element>
             </xs:schema>
             """)
 
-        self.assertIsNone(schema.validate('<root><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><ax/><ax/></root>'))
 
         schema = self.schema_class(
             """<?xml version="1.0" encoding="UTF-8"?>
@@ -577,6 +577,8 @@ class TestModelValidation(XsdValidatorTestCase):
         self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
         self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
 
+    def test_sequence_model_with_nested_choice_model(self):
+
         schema = self.schema_class(
             """<?xml version="1.0" encoding="UTF-8"?>
             <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
@@ -589,7 +591,7 @@ class TestModelValidation(XsdValidatorTestCase):
                 </xs:element>
                 <xs:group name="group1">
                     <xs:choice>
-                        <xs:element name="ax" maxOccurs="unbounded"/>
+                        <xs:element name="a" maxOccurs="unbounded"/>
                         <xs:element name="b"/>
                         <xs:element name="c"/>
                     </xs:choice>
@@ -597,9 +599,26 @@ class TestModelValidation(XsdValidatorTestCase):
             </xs:schema>
             """)
 
-        self.assertIsNone(schema.validate('<root><ax/><ax/></root>'))
-        # self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
-        # self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/></root>'))
+        self.assertIsNone(schema.validate('<root><a/><a/><a/><a/><a/><a/></root>'))
+
+    def test_sequence_model_with_optional_elements(self):
+        schema = self.schema_class(
+            """<?xml version="1.0" encoding="UTF-8"?>
+            <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+                <xs:element name="root">
+                    <xs:complexType>
+                        <xs:sequence minOccurs="2" maxOccurs="2">
+                            <xs:element name="a" minOccurs="1" maxOccurs="2" />
+                            <xs:element name="b" minOccurs="0" />
+                        </xs:sequence>
+                    </xs:complexType>
+               </xs:element>
+            </xs:schema>
+            """)
+
+        self.assertIsNone(schema.validate('<root><a/><a/><b/></root>'))
 
     def test_choice_model_with_extended_occurs(self):
         schema = self.schema_class(
diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py
index c26859d..96cfaaf 100644
--- a/xmlschema/validators/models.py
+++ b/xmlschema/validators/models.py
@@ -453,12 +453,12 @@ class ModelVisitor(MutableSequence):
             if isinstance(item, ModelGroup):
                 self.group, self.items, self.match = self.pop()
 
-            item_occurs = occurs[item]
             if self.group.model == 'choice':
+                item_occurs = occurs[item]
                 if not item_occurs:
                     return False
-
                 item_max_occurs = occurs[(item,)] or item_occurs
+
                 min_group_occurs = max(1, item_occurs // (item.max_occurs or item_occurs))
                 max_group_occurs = max(1, item_max_occurs // (item.min_occurs or 1))
 
@@ -468,31 +468,40 @@ class ModelVisitor(MutableSequence):
 
                 self.items = self.iter_group()
                 self.match = False
-                return item.is_missing(max(item_occurs, occurs[(item,)]))
+                return item.is_missing(item_max_occurs)
 
             elif self.group.model == 'all':
                 return False
-            elif item_occurs:
-                self.match = True
             elif self.match:
                 pass
+            elif occurs[item]:
+                self.match = True
             elif item.is_emptiable():
                 return False
-            elif self.group.min_occurs <= occurs[self.group] or self:
+            elif self.group.min_occurs <= max(occurs[self.group], occurs[(self.group,)]) or self:
                 return stop_item(self.group)
             else:
                 return True
 
             if item is self.group[-1]:
-                if any(occurs[x] for x in self if x is not item):
-                    self.occurs[self.group] += 1
-                else:
-                    group_occurs = max(1, item_occurs // (item.min_occurs or 1))
-                    if self.group.is_over(group_occurs):
-                        group_occurs = self.group.max_occurs
-                    self.occurs[self.group] += max(1, group_occurs)
+                for k, item2 in enumerate(self.group, start=1):
+                    item_occurs = occurs[item2]
+                    if not item_occurs:
+                        continue
 
-            return item.is_missing(max(item_occurs, occurs[(item,)]))
+                    item_max_occurs = occurs[(item2,)] or item_occurs
+                    if item_max_occurs == 1 or any(not x.is_emptiable() for x in self.group[k:]):
+                        self.occurs[self.group] += 1
+                        break
+
+                    min_group_occurs = max(1, item_occurs // (item2.max_occurs or item_occurs))
+                    max_group_occurs = max(1, item_max_occurs // (item2.min_occurs or 1))
+
+                    occurs[self.group] += min_group_occurs
+                    occurs[(self.group,)] += max_group_occurs
+                    break
+
+            return item.is_missing(max(occurs[item], occurs[(item,)]))
 
         element, occurs = self.element, self.occurs
         if element is None:
@@ -514,7 +523,7 @@ class ModelVisitor(MutableSequence):
                 yield element, occurs[element], [element]
 
             while True:
-                while self.group.is_over(occurs[self.group]):
+                while self.group.is_over(max(occurs[self.group], occurs[(self.group,)])):
                     stop_item(self.group)
 
                 obj = next(self.items, None)

From 92de835afa9bbd354ead7ad1e4b06051eef085c6 Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Mon, 18 Nov 2019 06:40:16 +0100
Subject: [PATCH 34/34] Fix defaultOpenContent and defaultAttributes parsing

---
 xmlschema/validators/complex_types.py | 118 ++++++++++++++++----------
 xmlschema/validators/elements.py      |   4 +
 xmlschema/validators/groups.py        |   5 +-
 xmlschema/validators/identities.py    |   2 +-
 xmlschema/validators/simple_types.py  |   7 +-
 xmlschema/validators/wildcards.py     |   4 +-
 xmlschema/validators/xsdbase.py       |   4 +-
 7 files changed, 92 insertions(+), 52 deletions(-)

diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py
index 4010445..edfe0b1 100644
--- a/xmlschema/validators/complex_types.py
+++ b/xmlschema/validators/complex_types.py
@@ -13,8 +13,9 @@ from __future__ import unicode_literals
 from ..exceptions import XMLSchemaValueError
 from ..qnames import XSD_ANNOTATION, XSD_GROUP, XSD_ATTRIBUTE_GROUP, XSD_SEQUENCE, \
     XSD_ALL, XSD_CHOICE, XSD_ANY_ATTRIBUTE, XSD_ATTRIBUTE, XSD_COMPLEX_CONTENT, \
-    XSD_RESTRICTION, XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_SIMPLE_CONTENT, \
-    XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, get_qname, local_name
+    XSD_RESTRICTION, XSD_COMPLEX_TYPE, XSD_EXTENSION, XSD_ANY_TYPE, XSD_OVERRIDE, \
+    XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \
+    get_qname, local_name
 from ..helpers import get_xsd_derivation_attribute
 
 from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError
@@ -52,6 +53,8 @@ class XsdComplexType(XsdType, ValidationMixin):
     mixed = False
     assertions = ()
     open_content = None
+    content_type = None
+    default_open_content = None
     _block = None
 
     _ADMITTED_TAGS = {XSD_COMPLEX_TYPE, XSD_RESTRICTION}
@@ -138,6 +141,10 @@ class XsdComplexType(XsdType, ValidationMixin):
 
         elif content_elem.tag in {XSD_GROUP, XSD_SEQUENCE, XSD_ALL, XSD_CHOICE}:
             self.content_type = self.schema.BUILDERS.group_class(content_elem, self.schema, self)
+            default_open_content = self.default_open_content
+            if default_open_content and \
+                    (self.mixed or self.content_type or default_open_content.applies_to_empty):
+                self.open_content = default_open_content
             self._parse_content_tail(elem)
 
         elif content_elem.tag == XSD_SIMPLE_CONTENT:
@@ -179,6 +186,7 @@ class XsdComplexType(XsdType, ValidationMixin):
                 self.base_type = base_type
             elif self.redefine:
                 self.base_type = self.redefine
+                self.open_content = None
 
             if derivation_elem.tag == XSD_RESTRICTION:
                 self._parse_complex_content_restriction(derivation_elem, base_type)
@@ -344,9 +352,11 @@ class XsdComplexType(XsdType, ValidationMixin):
                 "derived an empty content from base type that has not empty content.", elem
             )
 
-        if not self.open_content and self.schema.default_open_content:
-            if content_type or self.schema.default_open_content.applies_to_empty:
-                self.open_content = self.schema.default_open_content
+        if not self.open_content:
+            default_open_content = self.default_open_content
+            if default_open_content and \
+                    (self.mixed or content_type or default_open_content.applies_to_empty):
+                self.open_content = default_open_content
 
         if self.open_content and content_type and \
                 not self.open_content.is_restriction(base_type.open_content):
@@ -453,6 +463,8 @@ class XsdComplexType(XsdType, ValidationMixin):
     def is_empty(self):
         if self.name == XSD_ANY_TYPE:
             return False
+        elif self.open_content and self.open_content.mode != 'none':
+            return False
         return self.content_type.is_empty()
 
     def is_emptiable(self):
@@ -571,6 +583,10 @@ class XsdComplexType(XsdType, ValidationMixin):
         :return: yields a 3-tuple (simple content, complex content, attributes) containing \
         the decoded parts, eventually preceded by a sequence of validation or decoding errors.
         """
+        if self.is_empty() and elem.text:
+            reason = "character data between child elements not allowed because the type's content is empty"
+            yield self.validation_error(validation, reason, elem, **kwargs)
+
         # XSD 1.1 assertions
         for assertion in self.assertions:
             for error in assertion(elem, **kwargs):
@@ -665,6 +681,32 @@ class Xsd11ComplexType(XsdComplexType):
 
     _CONTENT_TAIL_TAGS = {XSD_ATTRIBUTE_GROUP, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE, XSD_ASSERT}
 
+    @property
+    def default_attributes(self):
+        if self.redefine is not None:
+            return self.schema.default_attributes
+
+        for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.schema.root):
+            if self.elem in child:
+                schema = self.schema.includes[child.attrib['schemaLocation']]
+                if schema.override is self.schema:
+                    return schema.default_attributes
+        else:
+            return self.schema.default_attributes
+
+    @property
+    def default_open_content(self):
+        if self.parent is not None:
+            return self.schema.default_open_content
+
+        for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.schema.root):
+            if self.elem in child:
+                schema = self.schema.includes[child.attrib['schemaLocation']]
+                if schema.override is self.schema:
+                    return schema.default_open_content
+        else:
+            return self.schema.default_open_content
+
     def _parse(self):
         super(Xsd11ComplexType, self)._parse()
 
@@ -677,19 +719,12 @@ class Xsd11ComplexType(XsdComplexType):
 
         # Add open content to complex content type
         if isinstance(self.content_type, XsdGroup):
-            open_content = self.open_content
-            if open_content is not None:
-                pass
-            elif self.schema.default_open_content is not None:
-                if self.content_type or self.schema.default_open_content.applies_to_empty:
-                    open_content = self.schema.default_open_content
-
-            if open_content is None:
-                pass
-            elif open_content.mode == 'interleave':
-                self.content_type.interleave = self.content_type.suffix = open_content.any_element
-            elif open_content.mode == 'suffix':
-                self.content_type.suffix = open_content.any_element
+            if self.open_content is None:
+                assert self.content_type.interleave is None and self.content_type.suffix is None
+            elif self.open_content.mode == 'interleave':
+                self.content_type.interleave = self.content_type.suffix = self.open_content.any_element
+            elif self.open_content.mode == 'suffix':
+                self.content_type.suffix = self.open_content.any_element
 
         # Add inheritable attributes
         if hasattr(self.base_type, 'attributes'):
@@ -707,19 +742,12 @@ class Xsd11ComplexType(XsdComplexType):
             self.default_attributes_apply = True
 
         # Add default attributes
-        if self.redefine is None:
-            default_attributes = self.schema.default_attributes
-        else:
-            default_attributes = self.redefine.schema.default_attributes
-
-        if default_attributes is None:
-            pass
-        elif self.default_attributes_apply and not self.is_override():
-            if self.redefine is None and any(k in self.attributes for k in default_attributes):
-                self.parse_error("at least a default attribute is already declared in the complex type")
-            self.attributes.update(
-                (k, v) for k, v in default_attributes.items() if k not in self.attributes
-            )
+        if self.default_attributes_apply:
+            default_attributes = self.default_attributes
+            if default_attributes is not None:
+                if self.redefine is None and any(k in self.attributes for k in default_attributes):
+                    self.parse_error("at least a default attribute is already declared in the complex type")
+                self.attributes.update((k, v) for k, v in default_attributes.items())
 
     def _parse_complex_content_extension(self, elem, base_type):
         # Complex content extension with simple base is forbidden XSD 1.1.
@@ -744,19 +772,6 @@ class Xsd11ComplexType(XsdComplexType):
         else:
             group_elem = None
 
-        if not self.open_content:
-            if self.schema.default_open_content:
-                self.open_content = self.schema.default_open_content
-            elif getattr(base_type, 'open_content', None):
-                self.open_content = base_type.open_content
-
-        try:
-            if self.open_content and not base_type.open_content.is_restriction(self.open_content):
-                msg = "{!r} is not an extension of the base type {!r}"
-                self.parse_error(msg.format(self.open_content, base_type.open_content))
-        except AttributeError:
-            pass
-
         if not base_type.content_type:
             if not base_type.mixed:
                 # Empty element-only model extension: don't create a nested sequence group.
@@ -831,6 +846,21 @@ class Xsd11ComplexType(XsdComplexType):
         else:
             self.content_type = self.schema.create_empty_content_group(self)
 
+        if not self.open_content:
+            default_open_content = self.default_open_content
+            if default_open_content and \
+                    (self.mixed or self.content_type or default_open_content.applies_to_empty):
+                self.open_content = default_open_content
+            elif base_type.open_content:
+                self.open_content = base_type.open_content
+
+        if base_type.open_content and self.open_content is not base_type.open_content:
+            if self.open_content.mode == 'none':
+                self.open_content = base_type.open_content
+            elif not base_type.open_content.is_restriction(self.open_content):
+                msg = "{!r} is not an extension of the base type {!r}"
+                self.parse_error(msg.format(self.open_content, base_type.open_content))
+
         self._parse_content_tail(elem, derivation='extension', base_attributes=base_type.attributes)
 
     def _parse_content_tail(self, elem, **kwargs):
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index 4b2d3c7..636c537 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -531,6 +531,10 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin)
                 yield converter.element_decode(element_data, self, level)
                 return
 
+        if xsd_type.is_empty() and elem.text:
+            reason = "character data is not allowed because the type's content is empty"
+            yield self.validation_error(validation, reason, elem, **kwargs)
+
         if not xsd_type.has_simple_content():
             for assertion in xsd_type.assertions:
                 for error in assertion(elem, **kwargs):
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index f3fa2ce..23ccbad 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -526,7 +526,8 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
 
             if model_element is not xsd_element and model_element.block:
                 for derivation in model_element.block.split():
-                    if xsd_type.is_derived(model_element.type, derivation):
+                    if xsd_type is not model_element.type and \
+                            xsd_type.is_derived(model_element.type, derivation):
                         reason = "usage of %r with type %s is blocked by head element"
                         raise XMLSchemaValidationError(self, reason % (xsd_element, derivation))
 
@@ -578,7 +579,7 @@ class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
                 if len(self) == 1 and isinstance(self[0], XsdAnyElement):
                     pass  # [XsdAnyElement()] equals to an empty complexType declaration
                 else:
-                    reason = "character data between child elements not allowed!"
+                    reason = "character data between child elements not allowed"
                     yield self.validation_error(validation, reason, elem, **kwargs)
                     cdata_index = 0  # Do not decode CDATA
 
diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py
index 1e51d95..65b4fd3 100644
--- a/xmlschema/validators/identities.py
+++ b/xmlschema/validators/identities.py
@@ -201,7 +201,7 @@ class XsdIdentity(XsdComponent):
                     yield XMLSchemaValidationError(self, e, "{!r} is not an element".format(xsd_element))
                 xsd_fields = self.get_fields(xsd_element)
 
-            if all(fld is None for fld in xsd_fields):
+            if not xsd_fields or all(fld is None for fld in xsd_fields):
                 continue
 
             try:
diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py
index 182015a..e6e5a3d 100644
--- a/xmlschema/validators/simple_types.py
+++ b/xmlschema/validators/simple_types.py
@@ -334,6 +334,10 @@ class XsdSimpleType(XsdType, ValidationMixin):
         else:
             return self.base_type.is_derived(other, derivation)
 
+    def is_dynamic_consistent(self, other):
+        return other is self.any_type or other is self.any_simple_type or self.is_derived(other) or \
+            hasattr(other, 'member_types') and any(self.is_derived(mt) for mt in other.member_types)
+
     def normalize(self, text):
         """
         Normalize and restrict value-space with pre-lexical and lexical facets.
@@ -867,7 +871,8 @@ class XsdUnion(XsdSimpleType):
         return all(mt.is_list() for mt in self.member_types)
 
     def is_dynamic_consistent(self, other):
-        return other.is_derived(self) or hasattr(other, 'member_types') and \
+        return other is self.any_type or other is self.any_simple_type or \
+            other.is_derived(self) or hasattr(other, 'member_types') and \
             any(mt1.is_derived(mt2) for mt1 in other.member_types for mt2 in self.member_types)
 
     def iter_components(self, xsd_classes=None):
diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
index fe2e448..82c2071 100644
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@@ -782,8 +782,8 @@ class XsdOpenContent(XsdComponent):
         return True
 
     def is_restriction(self, other):
-        if self.mode == 'none' or other is None or other.mode == 'none':
-            return True
+        if other is None or other.mode == 'none':
+            return self.mode == 'none'
         elif self.mode == 'interleave' and other.mode == 'suffix':
             return False
         else:
diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py
index aab0b89..a1af296 100644
--- a/xmlschema/validators/xsdbase.py
+++ b/xmlschema/validators/xsdbase.py
@@ -701,8 +701,8 @@ class XsdType(XsdComponent):
         return any(self.is_derived(xsd_type, derivation) for derivation in block)
 
     def is_dynamic_consistent(self, other):
-        return self.is_derived(other) or hasattr(other, 'member_types') and \
-            any(self.is_derived(mt) for mt in other.member_types)
+        return other is self.any_type or self.is_derived(other) or \
+            hasattr(other, 'member_types') and any(self.is_derived(mt) for mt in other.member_types)
 
     def is_key(self):
         return self.name == XSD_ID or self.is_derived(self.maps.types[XSD_ID])