140 lines
4.9 KiB
Python
140 lines
4.9 KiB
Python
# passerelle - uniform access to multiple data sources and services
|
|
# Copyright (C) 2023 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import base64
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from collections import OrderedDict
|
|
|
|
from django.conf import settings
|
|
from django.http.response import HttpResponse
|
|
from django.utils.translation import gettext_lazy as _
|
|
|
|
from passerelle.base.models import BaseResource
|
|
from passerelle.utils.api import endpoint
|
|
from passerelle.utils.jsonresponse import APIError
|
|
|
|
PDF_FILE_OBJECT = {
|
|
'type': 'object',
|
|
'description': _('PDF file'),
|
|
'required': ['content'],
|
|
'properties': {
|
|
'filename': {
|
|
'type': 'string',
|
|
'description': _('file name'),
|
|
},
|
|
'content_type': {
|
|
'type': 'string',
|
|
'description': _('MIME content-type'),
|
|
},
|
|
'content': {
|
|
'type': 'string',
|
|
'description': _('file content, base64 encoded'),
|
|
},
|
|
},
|
|
}
|
|
|
|
ASSEMBLE_SCHEMA = {
|
|
'$schema': 'http://json-schema.org/draft-04/schema#',
|
|
'title': '',
|
|
'description': '',
|
|
'type': 'object',
|
|
'required': ['filename', 'files'],
|
|
'unflatten': True,
|
|
'properties': OrderedDict(
|
|
{
|
|
'filename': {
|
|
'description': _('output PDF filename'),
|
|
'type': 'string',
|
|
},
|
|
'files': {
|
|
'type': 'array',
|
|
'description': _('PDF files to catenate'),
|
|
'items': {
|
|
'oneOf': [
|
|
PDF_FILE_OBJECT,
|
|
{'type': 'string', 'description': _('PDF content, base64 encoded')},
|
|
{'type': 'null', 'description': _('empty file, do not consider')},
|
|
]
|
|
},
|
|
},
|
|
}
|
|
),
|
|
}
|
|
|
|
|
|
class Resource(BaseResource):
|
|
category = _('Misc')
|
|
|
|
class Meta:
|
|
verbose_name = _('PDF')
|
|
|
|
def run_pdftk(self, args):
|
|
args = [settings.PDFTK_PATH] + args + ['output', '-']
|
|
try:
|
|
return subprocess.check_output(args, timeout=settings.PDFTK_TIMEOUT, stderr=subprocess.STDOUT)
|
|
except subprocess.TimeoutExpired as e:
|
|
raise APIError('pdftk timed out after %s seconds' % e.timeout)
|
|
except subprocess.CalledProcessError as e:
|
|
raise APIError('pdftk returned non-zero exit status %s (%r)' % (e.returncode, e.output))
|
|
|
|
@endpoint(
|
|
description=_('Returns the assembly of received PDF files'),
|
|
perm='can_access',
|
|
methods=['post'],
|
|
post={
|
|
'request_body': {'schema': {'application/json': ASSEMBLE_SCHEMA}},
|
|
'input_example': {
|
|
'filename': 'output.pdf',
|
|
'files/0': {
|
|
'filename': 'example-1.pdf',
|
|
'content_type': 'application/pdf',
|
|
'content': 'JVBERi0xL...(base64 PDF)...',
|
|
},
|
|
'files/1': {
|
|
'filename': 'example-2.pdf',
|
|
'content_type': 'application/pdf',
|
|
'content': '//4lUERGL...(base64 PDF)...',
|
|
},
|
|
'files/2': '//4lUERGL...(base64 PDF)',
|
|
},
|
|
},
|
|
)
|
|
def assemble(self, request, post_data):
|
|
filename = post_data.pop('filename')
|
|
|
|
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-assemble-' % self.id) as tmpdir:
|
|
infiles = []
|
|
for i, infile in enumerate(post_data['files']):
|
|
if isinstance(infile, dict) and infile.get('content'):
|
|
b64content = infile['content']
|
|
elif isinstance(infile, str) and infile:
|
|
b64content = infile
|
|
else:
|
|
continue
|
|
infile_filename = os.path.join(tmpdir, 'pdf-%d.pdf' % i)
|
|
with open(infile_filename, mode='wb') as fd:
|
|
fd.write(base64.b64decode(b64content))
|
|
infiles.append(infile_filename)
|
|
if not infiles:
|
|
raise APIError("no valid file found in 'files' property", http_status=400)
|
|
pdf_content = self.run_pdftk(args=infiles + ['cat'])
|
|
|
|
response = HttpResponse(pdf_content, content_type='application/pdf')
|
|
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
|
|
return response
|