Merge remote-tracking branch 'dnephin/perf_cache_resolving'

* dnephin/perf_cache_resolving:
  Use lru_cache
  Remove DefragResult.
  Remove context manager from ref() validation.
  Perf improvements by using a cache.
  Add benchmark script.
  Fix test failures
  issue #158: TRY to speed-up scope & $ref url-handling by keeping fragments separated from URL (and avoid redunant frag/defrag). Conflicts: 	jsonschema/tests/test_benchmarks.py
This commit is contained in:
Julian Berman 2015-03-14 20:03:23 -04:00
commit a959fb1688
No known key found for this signature in database
GPG Key ID: 3F8D9C8C011729F8
8 changed files with 172 additions and 44 deletions

74
bench.py Normal file
View File

@ -0,0 +1,74 @@
#!/usr/env/bin python
"""
Benchmark the performance of jsonschema.
Example benchmark:
wget http://swagger.io/v2/schema.json
wget http://petstore.swagger.io/v2/swagger.json
python bench.py -r 5 schema.json swagger.json
"""
from __future__ import print_function
import argparse
import cProfile
import json
import time
import jsonschema
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('schema', help="path to a schema used to benchmark")
parser.add_argument('document', help="document to validate with schema")
parser.add_argument('-r', '--repeat', type=int, help="number of iterations")
parser.add_argument('--profile',
help="Enable profiling, write profile to this filepath")
return parser.parse_args()
def run(filename, schema, document):
resolver = jsonschema.RefResolver(
'file://{0}'.format(filename),
schema,
store={schema['id']: schema})
jsonschema.validate(document, schema, resolver=resolver)
def format_time(time_):
return "%.3fms" % (time_ * 1000)
def run_timeit(schema_filename, document_filename, repeat, profile):
with open(schema_filename) as schema_file:
schema = json.load(schema_file)
with open(document_filename) as fh:
document = json.load(fh)
if profile:
profiler = cProfile.Profile()
profiler.enable()
times = []
for _ in range(repeat):
start_time = time.time()
run(schema_filename, schema, document)
times.append(time.time() - start_time)
if profile:
profiler.disable()
profiler.dump_stats(profile)
print(", ".join(map(format_time, sorted(times))))
print("Mean: {0}".format(format_time(sum(times) / repeat)))
def main():
args = parse_args()
run_timeit(args.schema, args.document, args.repeat, args.profile)
if __name__ == "__main__":
main()

View File

@ -19,8 +19,6 @@ from jsonschema.validators import (
Draft3Validator, Draft4Validator, RefResolver, validate
)
__version__ = "2.5.0-dev"
from jsonschema.version import __version__
# flake8: noqa

View File

@ -190,9 +190,14 @@ def enum(validator, enums, instance, schema):
def ref(validator, ref, instance, schema):
with validator.resolver.resolving(ref) as resolved:
scope, resolved = validator.resolver.resolve(ref)
validator.resolver.push_scope(scope)
try:
for error in validator.descend(instance, resolved):
yield error
finally:
validator.resolver.pop_scope()
def type_draft3(validator, types, instance, schema):

View File

@ -1,6 +1,8 @@
from __future__ import unicode_literals
import sys
import operator
import sys
try:
from collections import MutableMapping, Sequence # noqa
@ -11,6 +13,7 @@ PY3 = sys.version_info[0] >= 3
if PY3:
zip = zip
from functools import lru_cache
from io import StringIO
from urllib.parse import (
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit
@ -21,6 +24,7 @@ if PY3:
iteritems = operator.methodcaller("items")
else:
from itertools import izip as zip # noqa
from repoze.lru import lru_cache
from StringIO import StringIO
from urlparse import (
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa

View File

@ -1,5 +1,4 @@
from collections import deque
from contextlib import contextmanager
import json
from jsonschema import FormatChecker, ValidationError
@ -633,12 +632,8 @@ class ValidatorTestMixin(object):
resolver = RefResolver("", {})
schema = {"$ref" : mock.Mock()}
@contextmanager
def resolving():
yield {"type": "integer"}
with mock.patch.object(resolver, "resolving") as resolve:
resolve.return_value = resolving()
with mock.patch.object(resolver, "resolve") as resolve:
resolve.return_value = "url", {"type": "integer"}
with self.assertRaises(ValidationError):
self.validator_class(schema, resolver=resolver).validate(None)
@ -775,11 +770,11 @@ class TestRefResolver(unittest.TestCase):
self.assertEqual(resolved, self.referrer["properties"]["foo"])
def test_it_resolves_local_refs_with_id(self):
schema = {"id": "foo://bar/schema#", "a": {"foo": "bar"}}
schema = {"id": "http://bar/schema#", "a": {"foo": "bar"}}
resolver = RefResolver.from_schema(schema)
with resolver.resolving("#/a") as resolved:
self.assertEqual(resolved, schema["a"])
with resolver.resolving("foo://bar/schema#/a") as resolved:
with resolver.resolving("http://bar/schema#/a") as resolved:
self.assertEqual(resolved, schema["a"])
def test_it_retrieves_stored_refs(self):
@ -815,7 +810,7 @@ class TestRefResolver(unittest.TestCase):
def test_it_can_construct_a_base_uri_from_a_schema(self):
schema = {"id" : "foo"}
resolver = RefResolver.from_schema(schema)
self.assertEqual(resolver.base_uri, "foo")
self.assertEqual(resolver.resolution_scope, "foo")
with resolver.resolving("") as resolved:
self.assertEqual(resolved, schema)
with resolver.resolving("#") as resolved:
@ -828,7 +823,7 @@ class TestRefResolver(unittest.TestCase):
def test_it_can_construct_a_base_uri_from_a_schema_without_id(self):
schema = {}
resolver = RefResolver.from_schema(schema)
self.assertEqual(resolver.base_uri, "")
self.assertEqual(resolver.resolution_scope, "")
with resolver.resolving("") as resolved:
self.assertEqual(resolved, schema)
with resolver.resolving("#") as resolved:
@ -863,9 +858,7 @@ class TestRefResolver(unittest.TestCase):
)
with resolver.resolving(ref):
pass
with resolver.resolving(ref):
pass
self.assertEqual(foo_handler.call_count, 2)
self.assertEqual(foo_handler.call_count, 1)
def test_if_you_give_it_junk_you_get_a_resolution_error(self):
ref = "foo://bar"
@ -876,6 +869,13 @@ class TestRefResolver(unittest.TestCase):
pass
self.assertEqual(str(err.exception), "Oh no! What's this?")
def test_helpful_error_message_on_failed_pop_scope(self):
resolver = RefResolver("", {})
resolver.pop_scope()
with self.assertRaises(RefResolutionError) as exc:
resolver.pop_scope()
self.assertIn("Failed to pop the scope", str(exc.exception))
def sorted_errors(errors):
def key(error):

View File

@ -12,7 +12,7 @@ except ImportError:
from jsonschema import _utils, _validators
from jsonschema.compat import (
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
str_types, int_types, iteritems,
str_types, int_types, iteritems, lru_cache,
)
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
from jsonschema.exceptions import RefResolutionError, SchemaError, UnknownType
@ -79,7 +79,10 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
if _schema is None:
_schema = self.schema
with self.resolver.in_scope(_schema.get(u"id", u"")):
scope = _schema.get(u"id")
if scope:
self.resolver.push_scope(scope)
try:
ref = _schema.get(u"$ref")
if ref is not None:
validators = [(u"$ref", ref)]
@ -103,6 +106,9 @@ def create(meta_schema, validators=(), version=None, default_types=None): # noq
if k != u"$ref":
error.schema_path.appendleft(k)
yield error
finally:
if scope:
self.resolver.pop_scope()
def descend(self, instance, schema, path=None, schema_path=None):
for error in self.iter_errors(instance, schema):
@ -227,19 +233,22 @@ class RefResolver(object):
first resolution
:argument dict handlers: a mapping from URI schemes to functions that
should be used to retrieve them
:arguments callable cache_func: a function decorator used to cache
expensive calls. Should support the `functools.lru_cache` interface.
:argument int cache_maxsize: number of items to store in the cache. Set
this to 0 to disable caching. Defaults to 1000.
"""
def __init__(
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
cache_func=lru_cache, cache_maxsize=1000,
):
self.base_uri = base_uri
self.resolution_scope = base_uri
# This attribute is not used, it is for backwards compatibility
self.referrer = referrer
self.cache_remote = cache_remote
self.handlers = dict(handlers)
self._scopes_stack = [base_uri]
self.store = _utils.URIDict(
(id, validator.META_SCHEMA)
for id, validator in iteritems(meta_schemas)
@ -247,6 +256,9 @@ class RefResolver(object):
self.store.update(store)
self.store[base_uri] = referrer
self._urljoin_cache = cache_func(cache_maxsize)(urljoin)
self._resolve_cache = cache_func(cache_maxsize)(self.resolve_from_url)
@classmethod
def from_schema(cls, schema, *args, **kwargs):
"""
@ -259,17 +271,46 @@ class RefResolver(object):
return cls(schema.get(u"id", u""), schema, *args, **kwargs)
def push_scope(self, scope):
self._scopes_stack.append(
self._urljoin_cache(self.resolution_scope, scope))
def pop_scope(self):
try:
self._scopes_stack.pop()
except IndexError:
raise RefResolutionError(
"Failed to pop the scope from an empty stack. "
"`pop_scope()` should only be called once for every "
"`push_scope()`")
@property
def resolution_scope(self):
return self._scopes_stack[-1]
# Deprecated, this function is no longer used, but is preserved for
# backwards compatibility
@contextlib.contextmanager
def in_scope(self, scope):
old_scope = self.resolution_scope
self.resolution_scope = urljoin(old_scope, scope)
self.push_scope(scope)
try:
yield
finally:
self.resolution_scope = old_scope
self.pop_scope()
# Deprecated, this function is no longer used, but is preserved for
# backwards compatibility
@contextlib.contextmanager
def resolving(self, ref):
url, resolved = self.resolve(ref)
self.push_scope(url)
try:
yield resolved
finally:
self.pop_scope()
def resolve(self, ref):
"""
Context manager which resolves a JSON ``ref`` and enters the
resolution scope of this ref.
@ -277,26 +318,20 @@ class RefResolver(object):
:argument str ref: reference to resolve
"""
url = self._urljoin_cache(self.resolution_scope, ref)
return url, self._resolve_cache(url)
full_uri = urljoin(self.resolution_scope, ref)
uri, fragment = urldefrag(full_uri)
if not uri:
uri = self.base_uri
if uri in self.store:
document = self.store[uri]
else:
def resolve_from_url(self, url):
url, fragment = urldefrag(url)
try:
document = self.store[url]
except KeyError:
try:
document = self.resolve_remote(uri)
document = self.resolve_remote(url)
except Exception as exc:
raise RefResolutionError(exc)
old_base_uri, self.base_uri = self.base_uri, uri
try:
with self.in_scope(uri):
yield self.resolve_fragment(document, fragment)
finally:
self.base_uri = old_base_uri
return self.resolve_fragment(document, fragment)
def resolve_fragment(self, document, fragment):
"""

1
jsonschema/version.py Normal file
View File

@ -0,0 +1 @@
__version__ = "2.5.0-dev"

View File

@ -1,7 +1,12 @@
import os.path
from setuptools import setup
import sys
from jsonschema import __version__
# Load __version__ info globals without importing anything
with open(
os.path.join(os.path.dirname(__file__), 'jsonschema', 'version.py')
) as fh:
exec(fh.read())
with open("README.rst") as readme:
long_description = readme.read()
@ -21,6 +26,11 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
install_requires = []
if sys.version_info < (3, 2):
install_requires.append('repoze.lru >= 0.6')
setup(
name="jsonschema",
version=__version__,
@ -34,4 +44,5 @@ setup(
long_description=long_description,
url="http://github.com/Julian/jsonschema",
entry_points={"console_scripts": ["jsonschema = jsonschema.cli:main"]},
install_requires=install_requires,
)