diff --git a/bench.py b/bench.py new file mode 100644 index 0000000..e7318ed --- /dev/null +++ b/bench.py @@ -0,0 +1,74 @@ +#!/usr/env/bin python +""" +Benchmark the performance of jsonschema. + +Example benchmark: + + wget http://swagger.io/v2/schema.json + wget http://petstore.swagger.io/v2/swagger.json + python bench.py -r 5 schema.json swagger.json + +""" +from __future__ import print_function +import argparse +import cProfile +import json +import time + +import jsonschema + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('schema', help="path to a schema used to benchmark") + parser.add_argument('document', help="document to validate with schema") + parser.add_argument('-r', '--repeat', type=int, help="number of iterations") + parser.add_argument('--profile', + help="Enable profiling, write profile to this filepath") + return parser.parse_args() + + +def run(filename, schema, document): + resolver = jsonschema.RefResolver( + 'file://{0}'.format(filename), + schema, + store={schema['id']: schema}) + jsonschema.validate(document, schema, resolver=resolver) + + +def format_time(time_): + return "%.3fms" % (time_ * 1000) + + +def run_timeit(schema_filename, document_filename, repeat, profile): + with open(schema_filename) as schema_file: + schema = json.load(schema_file) + + with open(document_filename) as fh: + document = json.load(fh) + + if profile: + profiler = cProfile.Profile() + profiler.enable() + + times = [] + for _ in range(repeat): + start_time = time.time() + run(schema_filename, schema, document) + times.append(time.time() - start_time) + + if profile: + profiler.disable() + profiler.dump_stats(profile) + + print(", ".join(map(format_time, sorted(times)))) + print("Mean: {0}".format(format_time(sum(times) / repeat))) + + +def main(): + args = parse_args() + run_timeit(args.schema, args.document, args.repeat, args.profile) + + +if __name__ == "__main__": + main() diff --git a/jsonschema/__init__.py b/jsonschema/__init__.py index 16c9843..e23168d 100644 --- a/jsonschema/__init__.py +++ b/jsonschema/__init__.py @@ -19,8 +19,6 @@ from jsonschema.validators import ( Draft3Validator, Draft4Validator, RefResolver, validate ) - -__version__ = "2.5.0-dev" - +from jsonschema.version import __version__ # flake8: noqa diff --git a/jsonschema/_validators.py b/jsonschema/_validators.py index 7e5956d..a51681e 100644 --- a/jsonschema/_validators.py +++ b/jsonschema/_validators.py @@ -190,9 +190,14 @@ def enum(validator, enums, instance, schema): def ref(validator, ref, instance, schema): - with validator.resolver.resolving(ref) as resolved: + scope, resolved = validator.resolver.resolve(ref) + validator.resolver.push_scope(scope) + + try: for error in validator.descend(instance, resolved): yield error + finally: + validator.resolver.pop_scope() def type_draft3(validator, types, instance, schema): diff --git a/jsonschema/compat.py b/jsonschema/compat.py index 6ca49ab..9f52ded 100644 --- a/jsonschema/compat.py +++ b/jsonschema/compat.py @@ -1,6 +1,8 @@ from __future__ import unicode_literals -import sys + import operator +import sys + try: from collections import MutableMapping, Sequence # noqa @@ -11,6 +13,7 @@ PY3 = sys.version_info[0] >= 3 if PY3: zip = zip + from functools import lru_cache from io import StringIO from urllib.parse import ( unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit @@ -21,6 +24,7 @@ if PY3: iteritems = operator.methodcaller("items") else: from itertools import izip as zip # noqa + from repoze.lru import lru_cache from StringIO import StringIO from urlparse import ( urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 2b14372..1f03294 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1,5 +1,4 @@ from collections import deque -from contextlib import contextmanager import json from jsonschema import FormatChecker, ValidationError @@ -633,12 +632,8 @@ class ValidatorTestMixin(object): resolver = RefResolver("", {}) schema = {"$ref" : mock.Mock()} - @contextmanager - def resolving(): - yield {"type": "integer"} - - with mock.patch.object(resolver, "resolving") as resolve: - resolve.return_value = resolving() + with mock.patch.object(resolver, "resolve") as resolve: + resolve.return_value = "url", {"type": "integer"} with self.assertRaises(ValidationError): self.validator_class(schema, resolver=resolver).validate(None) @@ -775,11 +770,11 @@ class TestRefResolver(unittest.TestCase): self.assertEqual(resolved, self.referrer["properties"]["foo"]) def test_it_resolves_local_refs_with_id(self): - schema = {"id": "foo://bar/schema#", "a": {"foo": "bar"}} + schema = {"id": "http://bar/schema#", "a": {"foo": "bar"}} resolver = RefResolver.from_schema(schema) with resolver.resolving("#/a") as resolved: self.assertEqual(resolved, schema["a"]) - with resolver.resolving("foo://bar/schema#/a") as resolved: + with resolver.resolving("http://bar/schema#/a") as resolved: self.assertEqual(resolved, schema["a"]) def test_it_retrieves_stored_refs(self): @@ -815,7 +810,7 @@ class TestRefResolver(unittest.TestCase): def test_it_can_construct_a_base_uri_from_a_schema(self): schema = {"id" : "foo"} resolver = RefResolver.from_schema(schema) - self.assertEqual(resolver.base_uri, "foo") + self.assertEqual(resolver.resolution_scope, "foo") with resolver.resolving("") as resolved: self.assertEqual(resolved, schema) with resolver.resolving("#") as resolved: @@ -828,7 +823,7 @@ class TestRefResolver(unittest.TestCase): def test_it_can_construct_a_base_uri_from_a_schema_without_id(self): schema = {} resolver = RefResolver.from_schema(schema) - self.assertEqual(resolver.base_uri, "") + self.assertEqual(resolver.resolution_scope, "") with resolver.resolving("") as resolved: self.assertEqual(resolved, schema) with resolver.resolving("#") as resolved: @@ -863,9 +858,7 @@ class TestRefResolver(unittest.TestCase): ) with resolver.resolving(ref): pass - with resolver.resolving(ref): - pass - self.assertEqual(foo_handler.call_count, 2) + self.assertEqual(foo_handler.call_count, 1) def test_if_you_give_it_junk_you_get_a_resolution_error(self): ref = "foo://bar" @@ -876,6 +869,13 @@ class TestRefResolver(unittest.TestCase): pass self.assertEqual(str(err.exception), "Oh no! What's this?") + def test_helpful_error_message_on_failed_pop_scope(self): + resolver = RefResolver("", {}) + resolver.pop_scope() + with self.assertRaises(RefResolutionError) as exc: + resolver.pop_scope() + self.assertIn("Failed to pop the scope", str(exc.exception)) + def sorted_errors(errors): def key(error): diff --git a/jsonschema/validators.py b/jsonschema/validators.py index c347bf1..c84a3db 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -12,7 +12,7 @@ except ImportError: from jsonschema import _utils, _validators from jsonschema.compat import ( Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, - str_types, int_types, iteritems, + str_types, int_types, iteritems, lru_cache, ) from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa from jsonschema.exceptions import RefResolutionError, SchemaError, UnknownType @@ -79,7 +79,10 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq if _schema is None: _schema = self.schema - with self.resolver.in_scope(_schema.get(u"id", u"")): + scope = _schema.get(u"id") + if scope: + self.resolver.push_scope(scope) + try: ref = _schema.get(u"$ref") if ref is not None: validators = [(u"$ref", ref)] @@ -103,6 +106,9 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq if k != u"$ref": error.schema_path.appendleft(k) yield error + finally: + if scope: + self.resolver.pop_scope() def descend(self, instance, schema, path=None, schema_path=None): for error in self.iter_errors(instance, schema): @@ -227,19 +233,22 @@ class RefResolver(object): first resolution :argument dict handlers: a mapping from URI schemes to functions that should be used to retrieve them - + :arguments callable cache_func: a function decorator used to cache + expensive calls. Should support the `functools.lru_cache` interface. + :argument int cache_maxsize: number of items to store in the cache. Set + this to 0 to disable caching. Defaults to 1000. """ def __init__( self, base_uri, referrer, store=(), cache_remote=True, handlers=(), + cache_func=lru_cache, cache_maxsize=1000, ): - self.base_uri = base_uri - self.resolution_scope = base_uri # This attribute is not used, it is for backwards compatibility self.referrer = referrer self.cache_remote = cache_remote self.handlers = dict(handlers) + self._scopes_stack = [base_uri] self.store = _utils.URIDict( (id, validator.META_SCHEMA) for id, validator in iteritems(meta_schemas) @@ -247,6 +256,9 @@ class RefResolver(object): self.store.update(store) self.store[base_uri] = referrer + self._urljoin_cache = cache_func(cache_maxsize)(urljoin) + self._resolve_cache = cache_func(cache_maxsize)(self.resolve_from_url) + @classmethod def from_schema(cls, schema, *args, **kwargs): """ @@ -259,17 +271,46 @@ class RefResolver(object): return cls(schema.get(u"id", u""), schema, *args, **kwargs) + def push_scope(self, scope): + self._scopes_stack.append( + self._urljoin_cache(self.resolution_scope, scope)) + + def pop_scope(self): + try: + self._scopes_stack.pop() + except IndexError: + raise RefResolutionError( + "Failed to pop the scope from an empty stack. " + "`pop_scope()` should only be called once for every " + "`push_scope()`") + + @property + def resolution_scope(self): + return self._scopes_stack[-1] + + + # Deprecated, this function is no longer used, but is preserved for + # backwards compatibility @contextlib.contextmanager def in_scope(self, scope): - old_scope = self.resolution_scope - self.resolution_scope = urljoin(old_scope, scope) + self.push_scope(scope) try: yield finally: - self.resolution_scope = old_scope + self.pop_scope() + # Deprecated, this function is no longer used, but is preserved for + # backwards compatibility @contextlib.contextmanager def resolving(self, ref): + url, resolved = self.resolve(ref) + self.push_scope(url) + try: + yield resolved + finally: + self.pop_scope() + + def resolve(self, ref): """ Context manager which resolves a JSON ``ref`` and enters the resolution scope of this ref. @@ -277,26 +318,20 @@ class RefResolver(object): :argument str ref: reference to resolve """ + url = self._urljoin_cache(self.resolution_scope, ref) + return url, self._resolve_cache(url) - full_uri = urljoin(self.resolution_scope, ref) - uri, fragment = urldefrag(full_uri) - if not uri: - uri = self.base_uri - - if uri in self.store: - document = self.store[uri] - else: + def resolve_from_url(self, url): + url, fragment = urldefrag(url) + try: + document = self.store[url] + except KeyError: try: - document = self.resolve_remote(uri) + document = self.resolve_remote(url) except Exception as exc: raise RefResolutionError(exc) - old_base_uri, self.base_uri = self.base_uri, uri - try: - with self.in_scope(uri): - yield self.resolve_fragment(document, fragment) - finally: - self.base_uri = old_base_uri + return self.resolve_fragment(document, fragment) def resolve_fragment(self, document, fragment): """ diff --git a/jsonschema/version.py b/jsonschema/version.py new file mode 100644 index 0000000..9509105 --- /dev/null +++ b/jsonschema/version.py @@ -0,0 +1 @@ +__version__ = "2.5.0-dev" diff --git a/setup.py b/setup.py index 6b47714..ffd61f1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,12 @@ +import os.path from setuptools import setup +import sys -from jsonschema import __version__ - +# Load __version__ info globals without importing anything +with open( + os.path.join(os.path.dirname(__file__), 'jsonschema', 'version.py') +) as fh: + exec(fh.read()) with open("README.rst") as readme: long_description = readme.read() @@ -21,6 +26,11 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] +install_requires = [] + +if sys.version_info < (3, 2): + install_requires.append('repoze.lru >= 0.6') + setup( name="jsonschema", version=__version__, @@ -34,4 +44,5 @@ setup( long_description=long_description, url="http://github.com/Julian/jsonschema", entry_points={"console_scripts": ["jsonschema = jsonschema.cli:main"]}, + install_requires=install_requires, )