utils: add JSON flattening helpers (#37482)

* flatten/unflatten JSON document
* flatten JSON schema (to help users in producing flattened JSON
  documents, not to validate, validation must be done through
  unflattening then validating using the original JSON schema)
This commit is contained in:
Benjamin Dauvergne 2019-10-15 12:00:08 +02:00
parent a3ad8de720
commit d4d3e59e3d
17 changed files with 356 additions and 11 deletions

View File

@ -4,7 +4,7 @@ from django.http import Http404
from django.views.generic.base import View
from django.views.generic.detail import SingleObjectMixin, DetailView
from passerelle import utils
import passerelle.utils as utils
from .models import Bdp

View File

@ -3,7 +3,7 @@ import json
from django.views.generic.base import View
from django.views.generic.detail import SingleObjectMixin, DetailView
from passerelle import utils
import passerelle.utils as utils
from .models import ClicRdv

View File

@ -5,7 +5,7 @@ from django.http import Http404
from django.views.generic.base import View
from django.views.generic.detail import SingleObjectMixin, DetailView
from passerelle import utils
import passerelle.utils as utils
from .models import Gdc, phpserialize, phpserialize_loads, SOAPpy

View File

@ -4,7 +4,7 @@ from django.views.generic.base import View
from django.views.generic.detail import SingleObjectMixin, DetailView
from django.views.generic.edit import UpdateView
from passerelle import utils
import passerelle.utils as utils
from .models import Pastell
from .forms import PastellTypeForm, PastellFieldsForm

View File

@ -26,7 +26,7 @@ from django.http import HttpResponse, HttpResponseBadRequest, Http404, HttpRespo
from django.utils.translation import ugettext_lazy as _
from django.utils.http import urlencode
from passerelle import utils
import passerelle.utils as utils
from .models import AgoraPlus, AgoraPlusLink, AgoraAPIError
from .wcs import Formdata

View File

@ -16,7 +16,7 @@
from django.views.generic import DetailView as GenericDetailView
from passerelle import utils
import passerelle.utils as utils
from .models import FakeFamily

View File

@ -24,7 +24,7 @@ from django.views.generic import DetailView as GenericDetailView
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
from passerelle import utils
import passerelle.utils as utils
from passerelle.soap import sudsobject_to_dict, client_to_jsondict
from .soap import get_client

View File

@ -19,7 +19,7 @@ import json
from django.views.generic import DetailView as GenericDetailView, View
from django.views.decorators.csrf import csrf_exempt
from passerelle import utils
import passerelle.utils as utils
from .models import MeyzieuNewsletters

View File

@ -20,7 +20,7 @@ from django.views.generic import DetailView as GenericDetailView
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
from passerelle import utils
import passerelle.utils as utils
from passerelle.soap import sudsobject_to_dict, client_to_jsondict
from .soap import get_client

View File

@ -20,7 +20,7 @@ from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
from django.utils.translation import ugettext_lazy as _
from passerelle import utils
import passerelle.utils as utils
from .models import SolisAPA

View File

@ -13,10 +13,11 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
from cStringIO import StringIO
from functools import wraps
import hashlib
import json
import re
from itertools import islice, chain
import warnings
@ -39,6 +40,8 @@ from passerelle.base.signature import check_query, check_url
def response_for_json(request, data):
import json
response = HttpResponse(content_type='application/json')
json_str = json.dumps(data)
for variable in ('jsonpCallback', 'callback'):

View File

@ -14,6 +14,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import inspect
from django.core.urlresolvers import reverse

156
passerelle/utils/json.py Normal file
View File

@ -0,0 +1,156 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2019 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2018 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
from django.utils import six
FLATTEN_SEPARATOR = '/'
def unflatten(d, separator=FLATTEN_SEPARATOR):
'''Transform:
{"a/b/0/x": "1234"}
into:
{"a": {"b": [{"x": "1234"}]}}
'''
if not isinstance(d, dict) or not d: # unflattening an empty dict has no sense
return d
# ok d is a dict
def map_digits(l):
return [int(x) if x.isdigit() else x for x in l]
keys = [(map_digits(key.split(separator)), key) for key in d]
keys.sort()
def set_path(path, orig_key, d, value, i=0):
assert path
key, tail = path[i], path[i + 1:]
if not tail: # end of path, set thevalue
if isinstance(key, int):
assert isinstance(d, list)
if len(d) != key:
raise ValueError('incomplete array before %s' % orig_key)
d.append(value)
else:
assert isinstance(d, dict)
d[key] = value
else:
new = [] if isinstance(tail[0], int) else {}
if isinstance(key, int):
assert isinstance(d, list)
if len(d) < key:
raise ValueError('incomplete array before %s in %s' % (
separator.join(map(str, path[:i + 1])),
orig_key))
elif len(d) == key:
d.append(new)
else:
new = d[key]
else:
new = d.setdefault(key, new)
set_path(path, orig_key, new, value, i + 1)
# Is the first level an array or a dict ?
if isinstance(keys[0][0][0], int):
new = []
else:
new = {}
for path, key in keys:
value = d[key]
set_path(path, key, new, value)
return new
def flatten(data, separator=FLATTEN_SEPARATOR):
assert isinstance(data, (list, dict))
def helper(data):
if isinstance(data, list):
for i, value in enumerate(data):
for path, value in helper(value):
yield [str(i)] + path, value
elif isinstance(data, dict):
for key, value in six.iteritems(data):
for path, value in helper(value):
yield [str(key)] + path, value
else:
yield [], data
return {separator.join(path): value for path, value in helper(data)}
def flatten_json_schema(schema, separator=FLATTEN_SEPARATOR):
assert isinstance(schema, dict)
def helper(prefix, schema):
if 'oneOf' in schema:
schemas_by_keys = {}
for subschema in schema['oneOf']:
for key, schema in helper(prefix, subschema):
schemas_by_keys.setdefault(key, []).append(schema)
for key in schemas_by_keys:
schemas = schemas_by_keys[key]
if len(schemas) > 1:
yield key, {'oneOf': schemas}
else:
yield key, schemas[0]
elif schema['type'] == 'array':
prefix = prefix + separator if prefix else prefix
subschema = schema['items']
max_items = schema.get('maxItems', 3)
for i in range(max_items):
for key, schema in helper(str(i), subschema):
yield '%s%s' % (prefix, key), schema
elif schema['type'] == 'object':
prefix = prefix + separator if prefix else prefix
properties = schema['properties']
for key in properties:
for subkey, schema in helper(key, properties[key]):
yield '%s%s' % (prefix, subkey), schema
else:
yield prefix, schema
return {
'type': 'object',
'description': 'flattened schema *never* use for validation',
'properties': {
key: schema for key, schema in helper('', schema)
},
'additionalProperties': False,
}

View File

@ -2,6 +2,8 @@
# django-jsonresponse (https://github.com/jjay/django-jsonresponse) distributed
# under BSD license
from __future__ import absolute_import
import datetime
import json
import functools

View File

@ -14,6 +14,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import os
import re
import io

View File

@ -14,6 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import collections
import base64

179
tests/test_utils_json.py Normal file
View File

@ -0,0 +1,179 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2018 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2018 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pytest
import jsonschema
from passerelle.utils.json import flatten, unflatten, flatten_json_schema, FLATTEN_SEPARATOR as SEP
def test_unflatten_base():
assert unflatten('') == ''
assert unflatten('a') == 'a'
assert unflatten([]) == []
assert unflatten([1]) == [1]
assert unflatten({}) == {}
assert unflatten(0) == 0
assert unflatten(1) == 1
assert unflatten(False) is False
assert unflatten(True) is True
def test_unflatten_dict():
assert unflatten({
'a' + SEP + 'b' + SEP + '0': 1,
'a' + SEP + 'c' + SEP + '1': 'a',
'a' + SEP + 'b' + SEP + '1': True,
'a' + SEP + 'c' + SEP + '0': [1],
}) == {
'a': {
'b': [1, True],
'c': [[1], 'a'],
}
}
def test_unflatten_array():
assert unflatten({
'0' + SEP + 'b' + SEP + '0': 1,
'1' + SEP + 'c' + SEP + '1': 'a',
'0' + SEP + 'b' + SEP + '1': True,
'1' + SEP + 'c' + SEP + '0': [1],
}) == [{'b': [1, True]},
{'c': [[1], 'a']}]
def test_unflatten_missing_final_index():
with pytest.raises(ValueError) as exc_info:
unflatten({
'1': 1
})
assert 'incomplete' in exc_info.value.args[0]
def test_unflatten_missing_intermediate_index():
with pytest.raises(ValueError) as exc_info:
unflatten({
'a' + SEP + '1' + SEP + 'b': 1
})
assert 'incomplete' in exc_info.value.args[0]
def test_flatten_array_schema():
schema = {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'a': {
'type': 'string',
},
'b': {
'type': 'integer',
},
'c': {
'type': 'array',
'items': {
'type': 'integer',
}
}
},
'additionalProperties': False,
}
}
flattened_schema = flatten_json_schema(schema)
data = [
{'a': 'a', 'b': 1, 'c': [1, 2, 3]},
{'a': 'a', 'b': 1, 'c': [1, 2, 3]},
{'a': 'a', 'b': 1, 'c': [1, 2, 3]},
]
flattened_data = flatten(data)
jsonschema.validate(schema=schema, instance=data)
assert flattened_schema == {
'type': 'object',
'description': 'flattened schema *never* use for validation',
'properties': {
'0' + SEP + 'a': {'type': 'string'},
'0' + SEP + 'b': {'type': 'integer'},
'0' + SEP + 'c' + SEP + '0': {'type': 'integer'},
'0' + SEP + 'c' + SEP + '1': {'type': 'integer'},
'0' + SEP + 'c' + SEP + '2': {'type': 'integer'},
'1' + SEP + 'a': {'type': 'string'},
'1' + SEP + 'b': {'type': 'integer'},
'1' + SEP + 'c' + SEP + '0': {'type': 'integer'},
'1' + SEP + 'c' + SEP + '1': {'type': 'integer'},
'1' + SEP + 'c' + SEP + '2': {'type': 'integer'},
'2' + SEP + 'a': {'type': 'string'},
'2' + SEP + 'b': {'type': 'integer'},
'2' + SEP + 'c' + SEP + '0': {'type': 'integer'},
'2' + SEP + 'c' + SEP + '1': {'type': 'integer'},
'2' + SEP + 'c' + SEP + '2': {'type': 'integer'},
},
'additionalProperties': False,
}
# This should never be done as we cannot really validate all keys
# containing array indexes, here it works because array have less than 3
# elements.
jsonschema.validate(schema=flattened_schema, instance=flattened_data)
assert data == unflatten(flattened_data)
def test_flatten_dict_schema():
assert flatten_json_schema({
'type': 'object',
'properties': {
'a': {
'type': 'string',
},
'b': {
'type': 'integer',
},
'c': {
'type': 'array',
'items': {
'type': 'integer',
}
}
}
}) == {
'type': 'object',
'description': 'flattened schema *never* use for validation',
'properties': {
'a': {'type': 'string'},
'b': {'type': 'integer'},
'c' + SEP + '0': {'type': 'integer'},
'c' + SEP + '1': {'type': 'integer'},
'c' + SEP + '2': {'type': 'integer'},
},
'additionalProperties': False,
}