passerelle/passerelle/utils/zip.py

295 lines
10 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2019 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import, unicode_literals
import difflib
import io
import json
import os.path
import re
import xml.etree.ElementTree as ET
import zipfile
from django.template import Context, Template, TemplateDoesNotExist, TemplateSyntaxError, engines
from django.template.loader import get_template
from django.utils.encoding import force_str
from django.utils.functional import cached_property
from django.utils.six import python_2_unicode_compatible
from jsonschema import ValidationError, validate
from passerelle.utils.files import atomic_write
SCHEMA = {
'type': 'object',
'required': ['name_template'],
'properties': {
'name_template': {
'type': 'string',
},
'part_templates': {
'type': 'array',
'items': {
'oneOf': [
{
'type': 'object',
'required': ['name_template', 'template_path'],
'additionalProperties': False,
'properties': {
'name_template': {
'type': 'string',
},
'template_path': {
'type': 'string',
},
},
},
{
'type': 'object',
'required': ['name_template', 'content_expression'],
'additionalProperties': False,
'properties': {
'name_template': {
'type': 'string',
},
'content_expression': {
'type': 'string',
},
},
},
]
},
},
},
}
class ZipTemplateError(Exception):
pass
class ZipTemplateDoesNotExist(ZipTemplateError):
pass
class ZipTemplateSyntaxError(ZipTemplateError):
pass
VARIABLE_RE = re.compile(r'{{ *(\w*)')
@python_2_unicode_compatible
class ZipPart(object):
def __init__(self, zip_template, name_template, template_path=None, content_expression=None):
self.zip_template = zip_template
self._name_template = name_template
self.template_path = template_path
self.content_expression = content_expression
assert bool(self.template_path) ^ bool(
self.content_expression
), '\
template_path and content_expression are mutually excluded'
@property
def ctx(self):
return self.zip_template.ctx
@property
def base_path(self):
return self.zip_template.base_path
@property
def template(self):
assert self.name_template, 'not a template_path part'
template_path = os.path.join(self.base_path, self.template_path)
if template_path.startswith('/'):
if not os.path.exists(template_path):
raise ZipTemplateDoesNotExist('part template %s not found' % template_path, e)
try:
with open(template_path) as fd:
return Template(fd.read())
except TemplateSyntaxError as e:
raise ZipTemplateSyntaxError('syntax error in part template %s' % template_path, e)
else:
try:
return get_template(template_path).template
except TemplateSyntaxError as e:
raise ZipTemplateSyntaxError('syntax error in part template %s' % template_path, e)
except TemplateDoesNotExist as e:
raise ZipTemplateDoesNotExist('part template %s not found' % template_path, e)
@property
def name_template(self):
try:
return Template(self._name_template)
except TemplateSyntaxError as e:
raise ZipTemplateSyntaxError('syntax error in part\'s name template %s' % self, e)
def _render(self, template):
return template.render(Context(self.ctx, use_l10n=False))
@property
def content(self):
if self.template_path:
return self._render(self.template)
else:
return self.ctx[self.content_expression]
@property
def name(self):
return self._render(self.name_template)
def __str__(self):
s = '<{0.__class__.__name__} name_template={0._name_template}'
if self.template_path:
s += ' template_path={0.template_path!r}'
else:
s += ' content_expression={0.content_expression!r}'
s += '>'
return s.format(self)
class ZipTemplate(object):
def __init__(self, manifest, ctx=None):
if manifest.startswith('/'):
path = manifest
else:
path = None
for engine in engines.all():
for loader in engine.engine.template_loaders:
for origin in loader.get_template_sources(manifest):
if os.path.exists(origin.name):
path = origin.name
break
if path:
break
if path:
break
if not path:
raise ZipTemplateDoesNotExist('manifest %s not found' % manifest)
self.base_path = os.path.dirname(manifest)
self.manifest_path = path
try:
manifest = self.manifest
except ValueError as e:
raise ZipTemplateError('invalid manifest file %s' % path, e)
try:
validate(self.manifest, SCHEMA)
except ValidationError as e:
raise ZipTemplateError('invalid manifest file %s' % path, e)
self.ctx = ctx or {}
@cached_property
def manifest(self):
with open(self.manifest_path) as fd:
return json.load(fd)
@property
def name_template(self):
try:
return Template(self.manifest['name_template'])
except TemplateSyntaxError as e:
raise ZipTemplateSyntaxError('syntax error in zip name_template', e)
@property
def name(self):
return self.name_template.render(Context(self.ctx))
@property
def parts(self):
for part_template in self.manifest.get('part_templates', []):
yield ZipPart(zip_template=self, **part_template)
@property
def rendered_parts(self):
for zip_part in self.parts:
name = zip_part.name
content = zip_part.content
if name.endswith('.xml'):
try:
ET.fromstring(force_str(content))
except ET.ParseError as e:
raise ZipTemplateSyntaxError('XML syntax error in part template %s' % zip_part, e)
yield name, zip_part.content
def render_to_bytes(self):
with io.BytesIO() as buf:
self.render_to_file(buf)
return buf.getvalue()
def render_to_file(self, filelike):
with zipfile.ZipFile(filelike, 'w') as zi:
for name, content in self.rendered_parts:
zi.writestr(name, force_str(content))
def render_to_path(self, path, tmp_dir=None):
full_path = os.path.join(str(path), self.name)
with atomic_write(full_path, dir=tmp_dir) as fd:
self.render_to_file(fd)
def diff_zip(one, two):
def compute_diff(one, two, fd_one, fd_two):
content_one = fd_one.read()
content_two = fd_two.read()
if content_one == content_two:
return
if one.endswith(('.xml', '.json', '.txt')):
diff = list(difflib.ndiff(content_one.splitlines(), content_two.splitlines()))
return ['File %s differs' % one] + diff
return 'File %s differs' % one
def run(one, two):
differences = []
with zipfile.ZipFile(one) as one_zip, zipfile.ZipFile(two) as two_zip:
one_nl = set(one_zip.namelist())
two_nl = set(two_zip.namelist())
for name in one_nl - two_nl:
differences.append('File %s only in %s' % (name, one))
for name in two_nl - one_nl:
differences.append('File %s only in %s' % (name, two))
for name in one_nl & two_nl:
with one_zip.open(name) as fd_one:
with two_zip.open(name) as fd_two:
difference = compute_diff(name, name, fd_one, fd_two)
if difference:
differences.append(difference)
if not differences:
# check file order in zip
one_zip_files = [zi.filename for zi in one_zip.infolist()]
two_zip_files = [zi.filename for zi in two_zip.infolist()]
if one_zip_files != two_zip_files:
differences.append('Files are not in the same order')
return differences
if not hasattr(one, 'read'):
with open(one, mode='rb') as one:
if not hasattr(two, 'read'):
with open(two, 'rb') as two:
return run(one, two)
with two:
return run(one, two)
with one:
if not hasattr(two, 'read'):
with open(two, 'rb') as two:
return run(one, two)
with two:
return run(one, two)