issue #158: TRY to speed-up scope & $ref url-handling by keeping

fragments separated from URL (and avoid redunant frag/defrag).
Conflicts:
	jsonschema/tests/test_benchmarks.py

issue #158: Use try-finally to ensure resolver scopes_stack empty when
iteration breaks (no detectable performance penalty).

* Replace non-python-2.6 DefragResult with named-tuple.
* Add test-case checking scopes_stack empty.
Conflicts:
	jsonschema/tests/test_validators.py
	jsonschema/validators.py
This commit is contained in:
Kostis Anagnostopoulos @ STUW025 2014-09-24 03:20:07 +02:00 committed by Daniel Nephin
parent a38eac9048
commit 2fda1556ef
2 changed files with 45 additions and 23 deletions

View File

@ -1,6 +1,9 @@
from __future__ import unicode_literals
import sys
from collections import namedtuple
import operator
import sys
try:
from collections import MutableMapping, Sequence # noqa
@ -40,6 +43,9 @@ def urlsplit(url):
return SplitResult(scheme, netloc, path, query, fragment)
DefragResult = namedtuple('DefragResult', 'url fragment')
def urldefrag(url):
if "#" in url:
s, n, p, q, frag = urlsplit(url)
@ -47,7 +53,7 @@ def urldefrag(url):
else:
defrag = url
frag = ''
return defrag, frag
return DefragResult(defrag, frag)
# flake8: noqa

View File

@ -11,7 +11,8 @@ except ImportError:
from jsonschema import _utils, _validators
from jsonschema.compat import (
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, DefragResult,
str_types, int_types, iteritems,
)
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
@ -79,7 +80,10 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
if _schema is None:
_schema = self.schema
with self.resolver.in_scope(_schema.get(u"id", u"")):
scope = _schema.get(u"id")
if scope:
self.resolver.push_scope(scope)
try:
ref = _schema.get(u"$ref")
if ref is not None:
validators = [(u"$ref", ref)]
@ -103,6 +107,9 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
if k != u"$ref":
error.schema_path.appendleft(k)
yield error
finally:
if scope:
self.resolver.pop_scope()
def descend(self, instance, schema, path=None, schema_path=None):
for error in self.iter_errors(instance, schema):
@ -222,7 +229,7 @@ class RefResolver(object):
:argument str base_uri: URI of the referring document
:argument referrer: the actual referring document
:argument dict store: a mapping from URIs to documents to cache
:argument dict store: a mapping from URIs (without fragments!) to documents to cache
:argument bool cache_remote: whether remote refs should be cached after
first resolution
:argument dict handlers: a mapping from URI schemes to functions that
@ -233,6 +240,7 @@ class RefResolver(object):
def __init__(
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
):
base_uri = urldefrag(base_uri)
self.base_uri = base_uri
self.resolution_scope = base_uri
# This attribute is not used, it is for backwards compatibility
@ -240,12 +248,13 @@ class RefResolver(object):
self.cache_remote = cache_remote
self.handlers = dict(handlers)
self.scopes_stack = []
self.store = _utils.URIDict(
(id, validator.META_SCHEMA)
for id, validator in iteritems(meta_schemas)
)
self.store.update(store)
self.store[base_uri] = referrer
self.store[base_uri.url] = referrer
@classmethod
def from_schema(cls, schema, *args, **kwargs):
@ -259,14 +268,19 @@ class RefResolver(object):
return cls(schema.get(u"id", u""), schema, *args, **kwargs)
@contextlib.contextmanager
def in_scope(self, scope):
def push_scope(self, scope, is_defragged=False):
old_scope = self.resolution_scope
self.resolution_scope = urljoin(old_scope, scope)
try:
yield
finally:
self.resolution_scope = old_scope
self.scopes_stack.append(old_scope)
if not is_defragged:
scope = urldefrag(scope)
self.resolution_scope = DefragResult(
urljoin(old_scope.url, scope.url, allow_fragments=False)
if scope.url else old_scope.url,
scope.fragment
)
def pop_scope(self):
self.resolution_scope = self.scopes_stack.pop()
@contextlib.contextmanager
def resolving(self, ref):
@ -278,24 +292,26 @@ class RefResolver(object):
"""
full_uri = urljoin(self.resolution_scope, ref)
uri, fragment = urldefrag(full_uri)
if not uri:
uri = self.base_uri
ref = urldefrag(ref)
if uri in self.store:
document = self.store[uri]
else:
url = urljoin(self.resolution_scope.url, ref.url, allow_fragments=False) \
if ref.url else self.resolution_scope.url
try:
document = self.store[url]
except KeyError:
try:
document = self.resolve_remote(uri)
document = self.resolve_remote(url)
except Exception as exc:
raise RefResolutionError(exc)
uri = DefragResult(url, ref.fragment)
old_base_uri, self.base_uri = self.base_uri, uri
self.push_scope(uri, is_defragged=True)
try:
with self.in_scope(uri):
yield self.resolve_fragment(document, fragment)
yield self.resolve_fragment(document, ref.fragment)
finally:
self.pop_scope()
self.base_uri = old_base_uri
def resolve_fragment(self, document, fragment):