287 lines
11 KiB
Python
287 lines
11 KiB
Python
# passerelle - uniform access to multiple data sources and services
|
|
# Copyright (C) 2023 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import base64
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import xml.etree.ElementTree as ET
|
|
from collections import OrderedDict
|
|
|
|
from django.conf import settings
|
|
from django.core.exceptions import ValidationError
|
|
from django.db import models
|
|
from django.http.response import HttpResponse
|
|
from django.template.base import VariableDoesNotExist
|
|
from django.utils.translation import gettext_lazy as _
|
|
|
|
from passerelle.base.models import BaseResource
|
|
from passerelle.utils.api import endpoint
|
|
from passerelle.utils.jsonresponse import APIError
|
|
from passerelle.utils.models import resource_file_upload_to
|
|
from passerelle.utils.templates import render_to_string, validate_template
|
|
|
|
PDF_FILE_OBJECT = {
|
|
'type': 'object',
|
|
'description': _('PDF file'),
|
|
'required': ['content'],
|
|
'properties': {
|
|
'filename': {
|
|
'type': 'string',
|
|
'description': _('file name'),
|
|
},
|
|
'content_type': {
|
|
'type': 'string',
|
|
'description': _('MIME content-type'),
|
|
},
|
|
'content': {
|
|
'type': 'string',
|
|
'description': _('file content, base64 encoded'),
|
|
},
|
|
},
|
|
}
|
|
|
|
ASSEMBLE_SCHEMA = {
|
|
'$schema': 'http://json-schema.org/draft-04/schema#',
|
|
'title': '',
|
|
'description': '',
|
|
'type': 'object',
|
|
'required': ['filename', 'files'],
|
|
'unflatten': True,
|
|
'properties': OrderedDict(
|
|
{
|
|
'filename': {
|
|
'description': _('output PDF filename'),
|
|
'type': 'string',
|
|
},
|
|
'files': {
|
|
'type': 'array',
|
|
'description': _('PDF files to catenate'),
|
|
'items': {
|
|
'oneOf': [
|
|
PDF_FILE_OBJECT,
|
|
{'type': 'string', 'description': _('PDF content, base64 encoded')},
|
|
{'type': 'null', 'description': _('empty file, do not consider')},
|
|
]
|
|
},
|
|
},
|
|
}
|
|
),
|
|
}
|
|
|
|
FILL_FORM_SCHEMA = {
|
|
'$schema': 'http://json-schema.org/draft-04/schema#',
|
|
'title': '',
|
|
'description': '',
|
|
'type': 'object',
|
|
'required': ['filename'],
|
|
'unflatten': True,
|
|
'properties': OrderedDict(
|
|
{
|
|
'filename': {
|
|
'description': _('output PDF filename'),
|
|
'type': 'string',
|
|
},
|
|
'input-form': PDF_FILE_OBJECT,
|
|
'xfdf': {
|
|
'description': _('hierarchical dictionary of fields'),
|
|
'type': 'object',
|
|
},
|
|
}
|
|
),
|
|
}
|
|
|
|
|
|
def validate_pdf(fieldfile):
|
|
fieldfile.open()
|
|
if fieldfile.read(5) != b'%PDF-':
|
|
raise ValidationError(
|
|
_('%(value)s is not a PDF file'),
|
|
params={'value': fieldfile},
|
|
)
|
|
|
|
|
|
class Resource(BaseResource):
|
|
category = _('Misc')
|
|
|
|
fill_form_file = models.FileField(
|
|
_('Fill Form default input file'),
|
|
upload_to=resource_file_upload_to,
|
|
help_text=_('PDF file, used if not input-form in fill-form payload'),
|
|
validators=[validate_pdf],
|
|
null=True,
|
|
blank=True,
|
|
)
|
|
xfdf_template = models.FileField(
|
|
_('XFDF Template'),
|
|
upload_to=resource_file_upload_to,
|
|
help_text=_('Django template, used to create a XFDF for fill-form, rendered with payload'),
|
|
validators=[validate_template],
|
|
null=True,
|
|
blank=True,
|
|
)
|
|
|
|
class Meta:
|
|
verbose_name = _('PDF')
|
|
|
|
def run_pdftk(self, args):
|
|
args = [settings.PDFTK_PATH] + args + ['output', '-']
|
|
try:
|
|
return subprocess.check_output(args, timeout=settings.PDFTK_TIMEOUT, stderr=subprocess.STDOUT)
|
|
except subprocess.TimeoutExpired as e:
|
|
raise APIError('pdftk timed out after %s seconds' % e.timeout)
|
|
except subprocess.CalledProcessError as e:
|
|
raise APIError('pdftk returned non-zero exit status %s (%r)' % (e.returncode, e.output))
|
|
|
|
@endpoint(
|
|
description=_('Returns the assembly of received PDF files'),
|
|
perm='can_access',
|
|
methods=['post'],
|
|
post={
|
|
'request_body': {'schema': {'application/json': ASSEMBLE_SCHEMA}},
|
|
'input_example': {
|
|
'filename': 'output.pdf',
|
|
'files/0': {
|
|
'filename': 'example-1.pdf',
|
|
'content_type': 'application/pdf',
|
|
'content': 'JVBERi0xL...(base64 PDF)...',
|
|
},
|
|
'files/1': {
|
|
'filename': 'example-2.pdf',
|
|
'content_type': 'application/pdf',
|
|
'content': '//4lUERGL...(base64 PDF)...',
|
|
},
|
|
'files/2': '//4lUERGL...(base64 PDF)',
|
|
},
|
|
},
|
|
)
|
|
def assemble(self, request, post_data):
|
|
filename = post_data.pop('filename')
|
|
|
|
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-assemble-' % self.id) as tmpdir:
|
|
infiles = []
|
|
for i, infile in enumerate(post_data['files']):
|
|
if isinstance(infile, dict) and infile.get('content'):
|
|
b64content = infile['content']
|
|
elif isinstance(infile, str) and infile:
|
|
b64content = infile
|
|
else:
|
|
continue
|
|
infile_filename = os.path.join(tmpdir, 'pdf-%d.pdf' % i)
|
|
with open(infile_filename, mode='wb') as fd:
|
|
fd.write(base64.b64decode(b64content))
|
|
infiles.append(infile_filename)
|
|
if not infiles:
|
|
raise APIError("no valid file found in 'files' property", http_status=400)
|
|
pdf_content = self.run_pdftk(args=infiles + ['cat'])
|
|
|
|
response = HttpResponse(pdf_content, content_type='application/pdf')
|
|
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
|
|
return response
|
|
|
|
@endpoint(
|
|
name='fill-form',
|
|
description=_('Fills the input PDF form with fields'),
|
|
perm='can_access',
|
|
methods=['post'],
|
|
post={
|
|
'request_body': {'schema': {'application/json': FILL_FORM_SCHEMA}},
|
|
'input_example': {
|
|
'filename': 'filled.pdf',
|
|
'xfdf/Page1[0]/FirstName[0]': 'John',
|
|
'xfdf/Page1[0]/LastName[0]': 'Doe',
|
|
'xfdf/Page2[0]/Checkbox[0]': '0',
|
|
'xfdf/Page2[0]/Checkbox[1]': '1',
|
|
},
|
|
},
|
|
)
|
|
def fill_form(self, request, post_data):
|
|
filename = post_data['filename']
|
|
if 'xfdf' in post_data:
|
|
fields = post_data.pop('xfdf')
|
|
elif self.xfdf_template:
|
|
fields = None
|
|
else:
|
|
raise APIError("missing 'xfdf' property (no XFDF template)", http_status=400)
|
|
|
|
if fields is not None:
|
|
xfdf_root = ET.Element('xfdf')
|
|
xfdf_root.attrib['xmlns'] = 'http://ns.adobe.com/xfdf/'
|
|
xfdf_root.attrib['xml:space'] = 'preserve'
|
|
xfdf_f = ET.SubElement(xfdf_root, 'f')
|
|
xfdf_fields = ET.SubElement(xfdf_root, 'fields')
|
|
|
|
def add_fields(element, fields):
|
|
if isinstance(fields, dict):
|
|
for key in fields:
|
|
field = ET.SubElement(element, 'field')
|
|
field.attrib['name'] = key
|
|
add_fields(field, fields[key])
|
|
else:
|
|
value = ET.SubElement(element, 'value')
|
|
value.text = str(fields)
|
|
|
|
add_fields(xfdf_fields, fields)
|
|
|
|
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-fill-form-' % self.id) as tmpdir:
|
|
if isinstance(post_data.get('input-form'), dict) and post_data['input-form'].get('content'):
|
|
input_filename = os.path.join(tmpdir, 'input-form.pdf')
|
|
with open(input_filename, mode='wb') as fd:
|
|
fd.write(base64.b64decode(post_data['input-form']['content']))
|
|
elif self.fill_form_file:
|
|
input_filename = self.fill_form_file.path
|
|
else:
|
|
raise APIError(
|
|
"missing or bad 'input-form' property (no default input file)", http_status=400
|
|
)
|
|
# create xfdf
|
|
xfdf_filename = os.path.join(tmpdir, 'fields.xfdf')
|
|
if fields is not None:
|
|
xfdf_f.attrib['href'] = input_filename
|
|
with open(xfdf_filename, mode='wb') as fd:
|
|
ET.indent(xfdf_root)
|
|
ET.ElementTree(xfdf_root).write(fd, encoding='UTF-8', xml_declaration=True)
|
|
else:
|
|
self.xfdf_template.seek(0)
|
|
xfdf_template = self.xfdf_template.read().decode()
|
|
try:
|
|
xfdf_content = render_to_string(xfdf_template, post_data)
|
|
except VariableDoesNotExist as exc:
|
|
raise APIError("cannot render XFDF template: %s" % exc, http_status=400)
|
|
with open(xfdf_filename, mode='w') as fd:
|
|
fd.write(xfdf_content)
|
|
|
|
# call pdftk fill_form
|
|
pdf_content = self.run_pdftk(args=[input_filename, 'fill_form', xfdf_filename])
|
|
|
|
response = HttpResponse(pdf_content, content_type='application/pdf')
|
|
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
|
|
return response
|
|
|
|
def pdftk_dump_data_fields_utf8(self):
|
|
if not self.fill_form_file:
|
|
return
|
|
try:
|
|
dump = self.run_pdftk(args=[self.fill_form_file.path, 'dump_data_fields_utf8']).decode()
|
|
except APIError as apierror:
|
|
return 'Error: %r' % apierror
|
|
unflatten_separated = ''
|
|
for line in dump.splitlines():
|
|
unflatten_separated += '<br>%s' % line
|
|
if line.startswith('FieldName: '):
|
|
unflatten_separated += ' → <b>xfdf/%s</b>' % line[11:].replace('.', '/')
|
|
return unflatten_separated
|