Merge remote-tracking branch 'dnephin/perf_cache_resolving'
* dnephin/perf_cache_resolving: Use lru_cache Remove DefragResult. Remove context manager from ref() validation. Perf improvements by using a cache. Add benchmark script. Fix test failures issue #158: TRY to speed-up scope & $ref url-handling by keeping fragments separated from URL (and avoid redunant frag/defrag). Conflicts: jsonschema/tests/test_benchmarks.py
This commit is contained in:
commit
a959fb1688
|
@ -0,0 +1,74 @@
|
|||
#!/usr/env/bin python
|
||||
"""
|
||||
Benchmark the performance of jsonschema.
|
||||
|
||||
Example benchmark:
|
||||
|
||||
wget http://swagger.io/v2/schema.json
|
||||
wget http://petstore.swagger.io/v2/swagger.json
|
||||
python bench.py -r 5 schema.json swagger.json
|
||||
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import cProfile
|
||||
import json
|
||||
import time
|
||||
|
||||
import jsonschema
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('schema', help="path to a schema used to benchmark")
|
||||
parser.add_argument('document', help="document to validate with schema")
|
||||
parser.add_argument('-r', '--repeat', type=int, help="number of iterations")
|
||||
parser.add_argument('--profile',
|
||||
help="Enable profiling, write profile to this filepath")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def run(filename, schema, document):
|
||||
resolver = jsonschema.RefResolver(
|
||||
'file://{0}'.format(filename),
|
||||
schema,
|
||||
store={schema['id']: schema})
|
||||
jsonschema.validate(document, schema, resolver=resolver)
|
||||
|
||||
|
||||
def format_time(time_):
|
||||
return "%.3fms" % (time_ * 1000)
|
||||
|
||||
|
||||
def run_timeit(schema_filename, document_filename, repeat, profile):
|
||||
with open(schema_filename) as schema_file:
|
||||
schema = json.load(schema_file)
|
||||
|
||||
with open(document_filename) as fh:
|
||||
document = json.load(fh)
|
||||
|
||||
if profile:
|
||||
profiler = cProfile.Profile()
|
||||
profiler.enable()
|
||||
|
||||
times = []
|
||||
for _ in range(repeat):
|
||||
start_time = time.time()
|
||||
run(schema_filename, schema, document)
|
||||
times.append(time.time() - start_time)
|
||||
|
||||
if profile:
|
||||
profiler.disable()
|
||||
profiler.dump_stats(profile)
|
||||
|
||||
print(", ".join(map(format_time, sorted(times))))
|
||||
print("Mean: {0}".format(format_time(sum(times) / repeat)))
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
run_timeit(args.schema, args.document, args.repeat, args.profile)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -19,8 +19,6 @@ from jsonschema.validators import (
|
|||
Draft3Validator, Draft4Validator, RefResolver, validate
|
||||
)
|
||||
|
||||
|
||||
__version__ = "2.5.0-dev"
|
||||
|
||||
from jsonschema.version import __version__
|
||||
|
||||
# flake8: noqa
|
||||
|
|
|
@ -190,9 +190,14 @@ def enum(validator, enums, instance, schema):
|
|||
|
||||
|
||||
def ref(validator, ref, instance, schema):
|
||||
with validator.resolver.resolving(ref) as resolved:
|
||||
scope, resolved = validator.resolver.resolve(ref)
|
||||
validator.resolver.push_scope(scope)
|
||||
|
||||
try:
|
||||
for error in validator.descend(instance, resolved):
|
||||
yield error
|
||||
finally:
|
||||
validator.resolver.pop_scope()
|
||||
|
||||
|
||||
def type_draft3(validator, types, instance, schema):
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from __future__ import unicode_literals
|
||||
import sys
|
||||
|
||||
import operator
|
||||
import sys
|
||||
|
||||
|
||||
try:
|
||||
from collections import MutableMapping, Sequence # noqa
|
||||
|
@ -11,6 +13,7 @@ PY3 = sys.version_info[0] >= 3
|
|||
|
||||
if PY3:
|
||||
zip = zip
|
||||
from functools import lru_cache
|
||||
from io import StringIO
|
||||
from urllib.parse import (
|
||||
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit
|
||||
|
@ -21,6 +24,7 @@ if PY3:
|
|||
iteritems = operator.methodcaller("items")
|
||||
else:
|
||||
from itertools import izip as zip # noqa
|
||||
from repoze.lru import lru_cache
|
||||
from StringIO import StringIO
|
||||
from urlparse import (
|
||||
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from collections import deque
|
||||
from contextlib import contextmanager
|
||||
import json
|
||||
|
||||
from jsonschema import FormatChecker, ValidationError
|
||||
|
@ -633,12 +632,8 @@ class ValidatorTestMixin(object):
|
|||
resolver = RefResolver("", {})
|
||||
schema = {"$ref" : mock.Mock()}
|
||||
|
||||
@contextmanager
|
||||
def resolving():
|
||||
yield {"type": "integer"}
|
||||
|
||||
with mock.patch.object(resolver, "resolving") as resolve:
|
||||
resolve.return_value = resolving()
|
||||
with mock.patch.object(resolver, "resolve") as resolve:
|
||||
resolve.return_value = "url", {"type": "integer"}
|
||||
with self.assertRaises(ValidationError):
|
||||
self.validator_class(schema, resolver=resolver).validate(None)
|
||||
|
||||
|
@ -775,11 +770,11 @@ class TestRefResolver(unittest.TestCase):
|
|||
self.assertEqual(resolved, self.referrer["properties"]["foo"])
|
||||
|
||||
def test_it_resolves_local_refs_with_id(self):
|
||||
schema = {"id": "foo://bar/schema#", "a": {"foo": "bar"}}
|
||||
schema = {"id": "http://bar/schema#", "a": {"foo": "bar"}}
|
||||
resolver = RefResolver.from_schema(schema)
|
||||
with resolver.resolving("#/a") as resolved:
|
||||
self.assertEqual(resolved, schema["a"])
|
||||
with resolver.resolving("foo://bar/schema#/a") as resolved:
|
||||
with resolver.resolving("http://bar/schema#/a") as resolved:
|
||||
self.assertEqual(resolved, schema["a"])
|
||||
|
||||
def test_it_retrieves_stored_refs(self):
|
||||
|
@ -815,7 +810,7 @@ class TestRefResolver(unittest.TestCase):
|
|||
def test_it_can_construct_a_base_uri_from_a_schema(self):
|
||||
schema = {"id" : "foo"}
|
||||
resolver = RefResolver.from_schema(schema)
|
||||
self.assertEqual(resolver.base_uri, "foo")
|
||||
self.assertEqual(resolver.resolution_scope, "foo")
|
||||
with resolver.resolving("") as resolved:
|
||||
self.assertEqual(resolved, schema)
|
||||
with resolver.resolving("#") as resolved:
|
||||
|
@ -828,7 +823,7 @@ class TestRefResolver(unittest.TestCase):
|
|||
def test_it_can_construct_a_base_uri_from_a_schema_without_id(self):
|
||||
schema = {}
|
||||
resolver = RefResolver.from_schema(schema)
|
||||
self.assertEqual(resolver.base_uri, "")
|
||||
self.assertEqual(resolver.resolution_scope, "")
|
||||
with resolver.resolving("") as resolved:
|
||||
self.assertEqual(resolved, schema)
|
||||
with resolver.resolving("#") as resolved:
|
||||
|
@ -863,9 +858,7 @@ class TestRefResolver(unittest.TestCase):
|
|||
)
|
||||
with resolver.resolving(ref):
|
||||
pass
|
||||
with resolver.resolving(ref):
|
||||
pass
|
||||
self.assertEqual(foo_handler.call_count, 2)
|
||||
self.assertEqual(foo_handler.call_count, 1)
|
||||
|
||||
def test_if_you_give_it_junk_you_get_a_resolution_error(self):
|
||||
ref = "foo://bar"
|
||||
|
@ -876,6 +869,13 @@ class TestRefResolver(unittest.TestCase):
|
|||
pass
|
||||
self.assertEqual(str(err.exception), "Oh no! What's this?")
|
||||
|
||||
def test_helpful_error_message_on_failed_pop_scope(self):
|
||||
resolver = RefResolver("", {})
|
||||
resolver.pop_scope()
|
||||
with self.assertRaises(RefResolutionError) as exc:
|
||||
resolver.pop_scope()
|
||||
self.assertIn("Failed to pop the scope", str(exc.exception))
|
||||
|
||||
|
||||
def sorted_errors(errors):
|
||||
def key(error):
|
||||
|
|
|
@ -12,7 +12,7 @@ except ImportError:
|
|||
from jsonschema import _utils, _validators
|
||||
from jsonschema.compat import (
|
||||
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
|
||||
str_types, int_types, iteritems,
|
||||
str_types, int_types, iteritems, lru_cache,
|
||||
)
|
||||
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
|
||||
from jsonschema.exceptions import RefResolutionError, SchemaError, UnknownType
|
||||
|
@ -79,7 +79,10 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
|
|||
if _schema is None:
|
||||
_schema = self.schema
|
||||
|
||||
with self.resolver.in_scope(_schema.get(u"id", u"")):
|
||||
scope = _schema.get(u"id")
|
||||
if scope:
|
||||
self.resolver.push_scope(scope)
|
||||
try:
|
||||
ref = _schema.get(u"$ref")
|
||||
if ref is not None:
|
||||
validators = [(u"$ref", ref)]
|
||||
|
@ -103,6 +106,9 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
|
|||
if k != u"$ref":
|
||||
error.schema_path.appendleft(k)
|
||||
yield error
|
||||
finally:
|
||||
if scope:
|
||||
self.resolver.pop_scope()
|
||||
|
||||
def descend(self, instance, schema, path=None, schema_path=None):
|
||||
for error in self.iter_errors(instance, schema):
|
||||
|
@ -227,19 +233,22 @@ class RefResolver(object):
|
|||
first resolution
|
||||
:argument dict handlers: a mapping from URI schemes to functions that
|
||||
should be used to retrieve them
|
||||
|
||||
:arguments callable cache_func: a function decorator used to cache
|
||||
expensive calls. Should support the `functools.lru_cache` interface.
|
||||
:argument int cache_maxsize: number of items to store in the cache. Set
|
||||
this to 0 to disable caching. Defaults to 1000.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
|
||||
cache_func=lru_cache, cache_maxsize=1000,
|
||||
):
|
||||
self.base_uri = base_uri
|
||||
self.resolution_scope = base_uri
|
||||
# This attribute is not used, it is for backwards compatibility
|
||||
self.referrer = referrer
|
||||
self.cache_remote = cache_remote
|
||||
self.handlers = dict(handlers)
|
||||
|
||||
self._scopes_stack = [base_uri]
|
||||
self.store = _utils.URIDict(
|
||||
(id, validator.META_SCHEMA)
|
||||
for id, validator in iteritems(meta_schemas)
|
||||
|
@ -247,6 +256,9 @@ class RefResolver(object):
|
|||
self.store.update(store)
|
||||
self.store[base_uri] = referrer
|
||||
|
||||
self._urljoin_cache = cache_func(cache_maxsize)(urljoin)
|
||||
self._resolve_cache = cache_func(cache_maxsize)(self.resolve_from_url)
|
||||
|
||||
@classmethod
|
||||
def from_schema(cls, schema, *args, **kwargs):
|
||||
"""
|
||||
|
@ -259,17 +271,46 @@ class RefResolver(object):
|
|||
|
||||
return cls(schema.get(u"id", u""), schema, *args, **kwargs)
|
||||
|
||||
def push_scope(self, scope):
|
||||
self._scopes_stack.append(
|
||||
self._urljoin_cache(self.resolution_scope, scope))
|
||||
|
||||
def pop_scope(self):
|
||||
try:
|
||||
self._scopes_stack.pop()
|
||||
except IndexError:
|
||||
raise RefResolutionError(
|
||||
"Failed to pop the scope from an empty stack. "
|
||||
"`pop_scope()` should only be called once for every "
|
||||
"`push_scope()`")
|
||||
|
||||
@property
|
||||
def resolution_scope(self):
|
||||
return self._scopes_stack[-1]
|
||||
|
||||
|
||||
# Deprecated, this function is no longer used, but is preserved for
|
||||
# backwards compatibility
|
||||
@contextlib.contextmanager
|
||||
def in_scope(self, scope):
|
||||
old_scope = self.resolution_scope
|
||||
self.resolution_scope = urljoin(old_scope, scope)
|
||||
self.push_scope(scope)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.resolution_scope = old_scope
|
||||
self.pop_scope()
|
||||
|
||||
# Deprecated, this function is no longer used, but is preserved for
|
||||
# backwards compatibility
|
||||
@contextlib.contextmanager
|
||||
def resolving(self, ref):
|
||||
url, resolved = self.resolve(ref)
|
||||
self.push_scope(url)
|
||||
try:
|
||||
yield resolved
|
||||
finally:
|
||||
self.pop_scope()
|
||||
|
||||
def resolve(self, ref):
|
||||
"""
|
||||
Context manager which resolves a JSON ``ref`` and enters the
|
||||
resolution scope of this ref.
|
||||
|
@ -277,26 +318,20 @@ class RefResolver(object):
|
|||
:argument str ref: reference to resolve
|
||||
|
||||
"""
|
||||
url = self._urljoin_cache(self.resolution_scope, ref)
|
||||
return url, self._resolve_cache(url)
|
||||
|
||||
full_uri = urljoin(self.resolution_scope, ref)
|
||||
uri, fragment = urldefrag(full_uri)
|
||||
if not uri:
|
||||
uri = self.base_uri
|
||||
|
||||
if uri in self.store:
|
||||
document = self.store[uri]
|
||||
else:
|
||||
def resolve_from_url(self, url):
|
||||
url, fragment = urldefrag(url)
|
||||
try:
|
||||
document = self.store[url]
|
||||
except KeyError:
|
||||
try:
|
||||
document = self.resolve_remote(uri)
|
||||
document = self.resolve_remote(url)
|
||||
except Exception as exc:
|
||||
raise RefResolutionError(exc)
|
||||
|
||||
old_base_uri, self.base_uri = self.base_uri, uri
|
||||
try:
|
||||
with self.in_scope(uri):
|
||||
yield self.resolve_fragment(document, fragment)
|
||||
finally:
|
||||
self.base_uri = old_base_uri
|
||||
return self.resolve_fragment(document, fragment)
|
||||
|
||||
def resolve_fragment(self, document, fragment):
|
||||
"""
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
__version__ = "2.5.0-dev"
|
15
setup.py
15
setup.py
|
@ -1,7 +1,12 @@
|
|||
import os.path
|
||||
from setuptools import setup
|
||||
import sys
|
||||
|
||||
from jsonschema import __version__
|
||||
|
||||
# Load __version__ info globals without importing anything
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), 'jsonschema', 'version.py')
|
||||
) as fh:
|
||||
exec(fh.read())
|
||||
|
||||
with open("README.rst") as readme:
|
||||
long_description = readme.read()
|
||||
|
@ -21,6 +26,11 @@ classifiers = [
|
|||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
]
|
||||
|
||||
install_requires = []
|
||||
|
||||
if sys.version_info < (3, 2):
|
||||
install_requires.append('repoze.lru >= 0.6')
|
||||
|
||||
setup(
|
||||
name="jsonschema",
|
||||
version=__version__,
|
||||
|
@ -34,4 +44,5 @@ setup(
|
|||
long_description=long_description,
|
||||
url="http://github.com/Julian/jsonschema",
|
||||
entry_points={"console_scripts": ["jsonschema = jsonschema.cli:main"]},
|
||||
install_requires=install_requires,
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue