pdf: add fill-form endpoint with server side mappings (#74797)
gitea/passerelle/pipeline/head There was a failure building this commit Details

This commit is contained in:
Benjamin Dauvergne 2023-02-27 19:53:27 +01:00
parent c8e1a3f50b
commit 06b13b20db
11 changed files with 470 additions and 251 deletions

View File

@ -0,0 +1,63 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2020 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django import forms
from django.utils.translation import gettext_lazy as _
from passerelle.utils.forms import ConditionField, TemplateField
from passerelle.utils.pdf import PDF
from . import models
class FieldsMappingEditForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if not self.instance.fill_form_file:
return
fields_mapping = self.instance.fields_mapping or {}
with self.instance.fill_form_file as fd:
pdf = PDF(fd)
for page in pdf.pages:
for i, field in enumerate(page.fields):
name = f'field_{field.digest_id}'
if field.widget_type == 'checkbox':
help_text = _('boolean expression')
field_class = ConditionField
elif field.widget_type == 'text':
help_text = _('text template')
field_class = TemplateField
else:
continue
label = _('field {number} ({help_text})').format(number=i + 1, help_text=help_text)
initial = fields_mapping.get(name, '')
self.fields[name] = field_class(label=label, required=False, initial=initial)
self.fields[name].page_number = page.page_number
self.fields[name].widget.attrs['tabindex'] = '0'
self.fields[name].widget.attrs['class'] = '0'
def save(self, commit=True):
fields_mapping = {}
for name in self.fields:
value = self.cleaned_data.get(name)
if value:
fields_mapping[name] = value
self.instance.fields_mapping = fields_mapping
return super().save(commit=commit)
class Meta:
model = models.Resource
fields = ()

View File

@ -18,11 +18,11 @@ class Migration(migrations.Migration):
name='fill_form_file',
field=models.FileField(
blank=True,
help_text='PDF file, used if not input-form in fill-form payload',
help_text='PDF file',
null=True,
upload_to=passerelle.utils.models.resource_file_upload_to,
validators=[passerelle.apps.pdf.models.validate_pdf],
verbose_name='Fill Form default input file',
verbose_name='Fill Form input file',
),
),
]

View File

@ -0,0 +1,19 @@
# Generated by Django 3.2.18 on 2023-03-01 16:48
from django.contrib.postgres.fields.jsonb import JSONField
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('pdf', '0002_resource_fill_form_file'),
]
operations = [
migrations.AddField(
model_name='resource',
name='fields_mapping',
field=JSONField(null=True, verbose_name='Field mapping', blank=True),
),
]

View File

@ -18,10 +18,10 @@ import base64
import os
import subprocess
import tempfile
import xml.etree.ElementTree as ET
from collections import OrderedDict
from django.conf import settings
from django.contrib.postgres.fields.jsonb import JSONField
from django.core.exceptions import ValidationError
from django.db import models
from django.http.response import HttpResponse
@ -31,6 +31,8 @@ from passerelle.base.models import BaseResource
from passerelle.utils.api import endpoint
from passerelle.utils.jsonresponse import APIError
from passerelle.utils.models import resource_file_upload_to
from passerelle.utils.pdf import PDF
from passerelle.utils.templates import evaluate_condition, evaluate_template
PDF_FILE_OBJECT = {
'type': 'object',
@ -80,50 +82,36 @@ ASSEMBLE_SCHEMA = {
),
}
FILL_FORM_SCHEMA = {
'$schema': 'http://json-schema.org/draft-04/schema#',
'title': '',
'description': '',
'type': 'object',
'required': ['filename', 'fields'],
'unflatten': True,
'properties': OrderedDict(
{
'filename': {
'description': _('output PDF filename'),
'type': 'string',
},
'input-form': PDF_FILE_OBJECT,
'fields': {
'description': _('hierarchical dictionary of fields'),
'type': 'object',
},
}
),
}
def validate_pdf(fieldfile):
fieldfile.open()
if fieldfile.read(5) != b'%PDF-':
raise ValidationError(
_('%(value)s is not a PDF file'),
params={'value': fieldfile},
)
to_close = fieldfile.closed
try:
if fieldfile.read(5) != b'%PDF-':
raise ValidationError(
_('%(value)s is not a PDF file'),
params={'value': fieldfile},
)
finally:
if to_close:
fieldfile.close()
class Resource(BaseResource):
category = _('Misc')
fill_form_file = models.FileField(
_('Fill Form default input file'),
_('Fill Form input file'),
upload_to=resource_file_upload_to,
help_text=_('PDF file, used if not input-form in fill-form payload'),
help_text=_('PDF file'),
validators=[validate_pdf],
null=True,
blank=True,
)
fields_mapping = JSONField(verbose_name=_('Field mapping'), null=True, blank=True)
hide_description_fields = ['fields_mapping']
class Meta:
verbose_name = _('PDF')
@ -182,77 +170,86 @@ class Resource(BaseResource):
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
return response
FILL_FORM_SCHEMA = {
'$schema': 'http://json-schema.org/draft-04/schema#',
'title': '',
'description': _('content of the form to map on PDF fields'),
'unflatten': True,
'type': 'object',
'properties': OrderedDict(
{
'extra': {
'type': 'object',
'properties': OrderedDict(
{
'filename': {
'type': 'string',
'description': _('file name'),
},
'flatten': {
'description': _('remove PDF fields, keep only the drawed values'),
'type': 'boolean',
},
}
),
}
}
),
}
@endpoint(
name='fill-form',
description=_('Fills the input PDF form with fields'),
description=_('Fills the input PDF form with fields applying mappings to the received payload'),
perm='can_access',
methods=['post'],
parameters={
'filename': {'description': _('file name')},
'flatten': {'description': _('remove PDF fields, keep only the drawed values')},
},
post={
'request_body': {'schema': {'application/json': FILL_FORM_SCHEMA}},
'input_example': {
'filename': 'filled.pdf',
'fields/Page1[0]/FirstName[0]': 'John',
'fields/Page1[0]/LastName[0]': 'Doe',
'fields/Page2[0]/Checkbox[0]': '0',
'fields/Page2[0]/Checkbox[1]': '1',
'extra': {
'filename': 'filled.pdf',
'flatten': True,
},
'prenom': 'Jean',
'nom': 'Dupont',
},
},
)
def fill_form(self, request, post_data):
filename = post_data.pop('filename')
fields = post_data.pop('fields')
def fill_form(self, request, post_data, flatten=None, filename=None):
extra = post_data.pop('extra', {})
filename = filename or extra.get('filename') or post_data.get('filename') or 'form.pdf'
flatten_pdf = str(flatten or extra.get('flatten') or post_data.get('flatten')).lower() in (
'1',
'on',
'yes',
'true',
)
xfdf_root = ET.Element('xfdf')
xfdf_root.attrib['xmlns'] = 'http://ns.adobe.com/xfdf/'
xfdf_root.attrib['xml:space'] = 'preserve'
xfdf_f = ET.SubElement(xfdf_root, 'f')
xfdf_fields = ET.SubElement(xfdf_root, 'fields')
def add_fields(element, fields):
if isinstance(fields, dict):
for key in fields:
field = ET.SubElement(element, 'field')
field.attrib['name'] = key
add_fields(field, fields[key])
else:
value = ET.SubElement(element, 'value')
value.text = str(fields)
add_fields(xfdf_fields, fields)
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-fill-form-' % self.id) as tmpdir:
if isinstance(post_data.get('input-form'), dict) and post_data['input-form'].get('content'):
input_filename = os.path.join(tmpdir, 'input-form.pdf')
with open(input_filename, mode='wb') as fd:
fd.write(base64.b64decode(post_data['input-form']['content']))
elif self.fill_form_file:
input_filename = self.fill_form_file.path
else:
raise APIError("missing or bad 'input-form' property", http_status=400)
# create xfdf
xfdf_filename = os.path.join(tmpdir, 'fields.xfdf')
xfdf_f.attrib['href'] = input_filename
with open(xfdf_filename, mode='wb') as fd:
ET.indent(xfdf_root)
ET.ElementTree(xfdf_root).write(fd, encoding='UTF-8', xml_declaration=True)
# call pdftk fill_form
pdf_content = self.run_pdftk(args=[input_filename, 'fill_form', xfdf_filename])
response = HttpResponse(pdf_content, content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
return response
def pdftk_dump_data_fields_utf8(self):
if not self.fill_form_file:
return
try:
dump = self.run_pdftk(args=[self.fill_form_file.path, 'dump_data_fields_utf8']).decode()
except APIError as apierror:
return 'Error: %r' % apierror
unflatten_separated = ''
for line in dump.splitlines():
unflatten_separated += '<br>%s' % line
if line.startswith('FieldName: '):
unflatten_separated += ' → <b>fields/%s</b>' % line[11:].replace('.', '/')
return unflatten_separated
raise APIError('not PDF file configured')
fields_mapping = self.fields_mapping
if not fields_mapping:
raise APIError('no fields mapping configured')
with self.fill_form_file.open() as fd:
pdf = PDF(fd)
for page in pdf.pages:
for field in page.fields:
mapping_template = fields_mapping.get(f'field_{field.digest_id}')
if not mapping_template:
continue
if field.widget_type == 'checkbox':
value = evaluate_condition(mapping_template, post_data)
elif field.widget_type == 'text':
value = evaluate_template(mapping_template, post_data)
else:
raise NotImplementedError
if value is not None:
field.set(value)
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
pdf.write(response, flatten=flatten_pdf)
return response

View File

@ -0,0 +1,74 @@
{% extends "passerelle/manage/resource_child_base.html" %}
{% load i18n gadjo %}
{% block breadcrumb %}
<a href="{% url 'manage-home' %}">{% trans 'Web Services' %}</a>
<a href="{{ object.get_absolute_url }}">PDF &mdash; {{ object.title }}</a>
<a href="#">{% trans "Edit fields mapping" %}</a>
{% endblock %}
{% block appbar %}
<h2>
{% trans "Edit fields mapping" %}
</h2>
{% endblock %}
{% block content %}
<form method="post" enctype="multipart/form-data" class="pdf-fields-mapping-edit-form">
{% csrf_token %}
<div class="buttons pdf-fields-mapping-edit-form--buttons">
<button class="submit-button">{% trans "Save" %}</button>
<a class="cancel" href="{{ object.get_absolute_url }}">{% trans 'Cancel' %}</a>
</div>
{% if form.errors %}
<div class="errornotice" tabindex="-1" autofocus>
<p>{% trans "There were errors processing your form." %}</p>
{% for error in form.non_field_errors %}
<p>{{ error }}</p>
{% endfor %}
{% for field in form %}
{% if field.is_hidden and field.errors %}
<p>
{% for error in field.errors %}
{% blocktrans with name=field.name %}(Hidden field {{name}}) {{ error }}{% endblocktrans %}
{% if not forloop.last %}<br>{% endif %}
{% endfor %}
</p>
{% endif %}
{% endfor %}
</div>
{% endif %}
{% for page_number, image_map in pages %}
<h3>{% blocktrans with number=page_number|add:1 %}Page {{number}}{% endblocktrans %}</h3>
<div class="pdf-fields-mapping-edit-form--page">
<div class="pdf-fields-mapping-edit-form--thumbnail">
<div>
<map name="map-page-{{ page_number }}">
{{ image_map|safe }}
</map>
<img src="{% url "pdf-page-thumbnail" connector="pdf" slug=object.slug page_number=page_number %}" usemap="#map-page-{{ page_number }}">
</div>
</div>
<div class="pdf-fields-mapping-edit-form--fields">
{% for field in form %}
{% if field.field.page_number == page_number %}
{% include "gadjo/widget.html" with field=field %}
{% endif %}
{% endfor %}
</div>
</div>
{% endfor %}
<div class="buttons pdf-fields-mapping-edit-form--buttons">
<button class="submit-button">{% trans "Save" %}</button>
<a class="cancel" href="{{ object.get_absolute_url }}">{% trans 'Cancel' %}</a>
</div>
</form>
<script>
$(document).on('click', 'area', function (event) {
var $target = $(event.target);
var href = $target.attr('href');
$('.pdf-fields-mapping-edit-form--fields').scrollTop = $(href).offsetTop;
$(href + ' input').focus();
})
</script>
{% endblock %}

View File

@ -1,20 +1,8 @@
{% extends "passerelle/manage/service_view.html" %}
{% load i18n passerelle %}
{% block extra-tab-buttons %}
{% if user.is_staff and object.fill_form_file %}
<button role="tab" aria-selected="false" aria-controls="panel-dumpfields" id="tab-dumpfields"
tabindex="-1">{% trans "Fill Form default PDF Fields" %}</button>
{% endif %}
{% endblock %}
{% block extra-tab-panels %}
{% if user.is_staff and object.fill_form_file %}
<div id="panel-dumpfields" role="tabpanel" tabindex="-1" aria-labelledby="tab-dumpfields" hidden>
<div>
<p>{% blocktrans with file=object.fill_form_file %}PDFtk {{ file }} dump_data_fields_utf8 output{% endblocktrans %}</p>
<p>{{ object.pdftk_dump_data_fields_utf8|safe }}</p>
</div>
</div>
{% block actions %}
{% if object|can_edit:request.user %}
<a href="{% url 'pdf-fields-mapping-edit' connector='pdf' slug=object.slug %}">{% trans 'Fill form: Edit fields mapping' %}</a>
{% endif %}
{% endblock %}

View File

@ -0,0 +1,32 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2019 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from django.urls import re_path
from . import views
management_urlpatterns = [
re_path(
r'^(?P<slug>[\w,-]+)/fields-mapping/edit/$',
views.FieldsMappingEditView.as_view(),
name='pdf-fields-mapping-edit',
),
re_path(
r'^(?P<slug>[\w,-]+)/page/(?P<page_number>[0-9]+)/$',
views.PageThumbnailView.as_view(),
name='pdf-page-thumbnail',
),
]

View File

@ -0,0 +1,94 @@
# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2019 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import hashlib
import io
import PIL.Image
import PIL.ImageDraw
from django.http import Http404, HttpResponse, HttpResponseNotModified
from django.utils.translation import gettext_lazy as _
from django.views.generic import UpdateView
from passerelle.base.views import ResourceView
from passerelle.utils.pdf import PDF
from . import forms, models
class FieldsMappingEditView(ResourceView, UpdateView):
template_name = 'pdf/fields_mapping_edit.html'
model = models.Resource
form_class = forms.FieldsMappingEditForm
def get_context_data(self, **kwargs):
context_data = super().get_context_data(**kwargs)
resource = self.get_object()
with resource.fill_form_file as fd:
pdf = PDF(fd)
pages = []
for page in pdf.pages:
pages.append((page.page_number, page.fields_image_map(id_prefix='id_field_', id_suffix='_p')))
context_data['pages'] = pages
return context_data
def get_success_url(self):
return super().get_success_url() + '#'
class PageThumbnailView(ResourceView):
model = models.Resource
def make_thumbnail(self, page):
# produce a thumbnail and add
# * red rectangle over field's rectangles
# * enumerated field names
thumbnail = page.thumbnail_png()
image = PIL.Image.open(io.BytesIO(thumbnail))
draw = PIL.ImageDraw.Draw(image, 'RGBA')
for i, (field, area_rect) in enumerate(page.thumbnail_field_rects()):
draw.rectangle(area_rect, fill=(255, 0, 0, 50))
x = area_rect.x1
y = (area_rect.y1 + area_rect.y2) / 2 - 5
if field.widget_type == 'checkbox':
y -= 10
draw.text((x, y), str(_('field %s') % (i + 1)), anchor='lb', fill=(0, 0, 0, 255))
del draw
output = io.BytesIO()
image.save(output, 'PNG')
return output.getvalue()
def get(self, request, page_number, **kwargs):
with self.get_object().fill_form_file as fd:
pdf_content = fd.read()
etag = hashlib.md5(pdf_content).hexdigest()
if_none_match = request.headers.get('If-None-Match', '').split(',')
if etag in if_none_match:
# use browser cache
response = HttpResponseNotModified()
else:
# produce the thumbnail
pdf = PDF(pdf_content)
try:
page = pdf.page(int(page_number))
except IndexError:
raise Http404
thumbnail_content = self.make_thumbnail(page)
response = HttpResponse(thumbnail_content, content_type='image/png')
response['ETag'] = etag
response['Cache-Control'] = 'max-age=3600'
return response

View File

@ -407,3 +407,31 @@ ul.get-params li {
min-height: 200px;
}
}
/* passerelle/apps/pdf/templates/pdf/field_mapping_edit.html */
.pdf-fields-mapping-edit-form--page {
display: flex;
height: max-content;
}
.pdf-fields-mapping-edit-form--fields {
height: 1132px;
overflow-y: scroll;
> .widget textarea, > .widget input {
width: 100%;
}
.widget:target {
border: 2px dashed #FFAAAA;
}
}
.pdf-fields-mapping-edit-form--thumbnail {
position: sticky;
top: 0;
}
.pdf-fields-mapping-edit-form--thumbnail, .pdf-fields-mapping-edit-form--fields {
margin-right: 1em;
flex: 1;
}

View File

@ -157,6 +157,7 @@ class Page:
str(self.page_number + 1),
'-',
],
stderr=subprocess.DEVNULL,
input=self.pdf.content,
)
)
@ -238,6 +239,7 @@ class PDF:
f'-sOutputFile={output.name}',
'-',
],
stderr=subprocess.DEVNULL,
input=original_content,
)
except subprocess.CalledProcessError as e:

View File

@ -17,17 +17,17 @@
import base64
import os
import subprocess
import xml.etree.ElementTree as ET
from io import BytesIO
from unittest import mock
import pytest
from django.core.exceptions import ValidationError
from django.core.files import File
from django.urls import reverse
from django.core.files.base import ContentFile
from pdfrw import PdfReader
from passerelle.apps.pdf.models import Resource
from passerelle.utils.pdf import PDF
from tests.test_manager import login
from tests.utils import generic_endpoint_url, setup_access_rights
@ -139,145 +139,6 @@ def test_pdf_real_pdftk_assemble(app, pdf, settings):
assert PdfReader(fdata=resp.content).numPages == 2
@mock.patch('subprocess.check_output')
def test_pdf_fill_form(mocked_check_output, app, pdf):
endpoint = generic_endpoint_url('pdf', 'fill-form', slug=pdf.slug)
def check_xml(args, **kwargs):
# check XML FDF file
xfdf = ET.parse(args[3]).getroot()
assert xfdf.tag == '{http://ns.adobe.com/xfdf/}xfdf'
assert xfdf.find('{http://ns.adobe.com/xfdf/}f').attrib['href'].endswith('.pdf')
field = xfdf.find('{http://ns.adobe.com/xfdf/}fields').find('{http://ns.adobe.com/xfdf/}field')
assert field.attrib['name'] == 'fname'
assert field.find('{http://ns.adobe.com/xfdf/}value').text == 'John'
payload = {
'filename': 'foo.pdf',
'fields/fname': 'John',
'input-form': {'content': acroform_b64content},
}
mocked_check_output.side_effect = check_xml
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
assert mocked_check_output.call_count == 1
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 6
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('/input-form.pdf')
assert pdftk_call[2] == 'fill_form'
assert pdftk_call[3].endswith('/fields.xfdf')
assert pdftk_call[4] == 'output'
assert pdftk_call[5] == '-'
assert mocked_check_output.call_args.kwargs['timeout'] == 20
pdf.fill_form_file = File(BytesIO(acroform_content), 'default.pdf')
pdf.save()
payload = {
'filename': 'bar.pdf',
'fields/fname': 'John',
}
mocked_check_output.reset_mock()
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="bar.pdf"'
assert mocked_check_output.call_count == 1
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 6
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('media/pdf/test/default.pdf')
assert pdftk_call[2] == 'fill_form'
assert pdftk_call[3].endswith('/fields.xfdf')
assert pdftk_call[4] == 'output'
assert pdftk_call[5] == '-'
assert mocked_check_output.call_args.kwargs['timeout'] == 20
# pdftk errors (faked)
payload = {
'filename': 'foo.pdf',
'fields/fname': 'Bill',
'input-form': {'content': acroform_b64content},
}
mocked_check_output.reset_mock()
mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20)
resp = app.post_json(endpoint, params=payload, status=200)
assert mocked_check_output.call_count == 1
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('pdftk timed out after 20 seconds')
mocked_check_output.reset_mock()
mocked_check_output.side_effect = subprocess.CalledProcessError(cmd=[], returncode=42, output='ooops')
resp = app.post_json(endpoint, params=payload, status=200)
assert mocked_check_output.call_count == 1
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('pdftk returned non-zero exit status 42')
assert 'ooops' in resp.json['err_desc']
# bad calls errors
resp = app.post(endpoint, status=400)
assert resp.headers['content-type'].startswith('application/json')
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('could not decode body to json')
payload = {}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'filename' is a required property"
payload = {'filename': 'out.pdf'}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'fields' is a required property"
payload = {'filename': 'out.pdf', 'fields': 'not-a-dict'}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "fields: 'not-a-dict' is not of type 'object'"
pdf.fill_form_file = None # no default PDF form
pdf.save()
payload = {
'filename': 'bar.pdf',
'fields/fname': 'Alice',
}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "missing or bad 'input-form' property"
resp = app.get(endpoint, status=405)
def test_pdf_real_pdftk_fillform(admin_user, app, pdf, settings):
if not os.path.exists(settings.PDFTK_PATH):
pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH)
endpoint = generic_endpoint_url('pdf', 'fill-form', slug=pdf.slug)
payload = {
'filename': 'filled.pdf',
'fields/fname': 'ThisIsMyFirstName',
'input-form': {'content': acroform_b64content},
}
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="filled.pdf"'
assert PdfReader(fdata=resp.content).numPages == 1
assert resp.content[:5] == b'%PDF-'
# TODO: found an easy way to verify 'ThisIsMyFirstName' in resp.content
# dump fields in manager view
pdf.fill_form_file = File(BytesIO(acroform_content), 'pdf-form.pdf')
pdf.save()
manage_url = reverse('view-connector', kwargs={'connector': 'pdf', 'slug': pdf.slug})
resp = app.get(manage_url)
assert 'panel-dumpfields' not in resp.text
assert '<b>fields/fname</b>' not in resp.text
app = login(app)
resp = app.get(manage_url)
assert 'panel-dumpfields' in resp.text
assert '<b>fields/fname</b>' in resp.text
def test_pdf_validator(pdf):
pdf.fill_form_file = File(BytesIO(pdf_content), 'default.pdf')
pdf.save()
@ -291,3 +152,64 @@ def test_pdf_validator(pdf):
pdf.save()
with pytest.raises(ValidationError):
pdf.full_clean()
@pytest.fixture
def cerfa_content():
with open('tests/data/cerfa_10072-02.pdf', 'rb') as fd:
return fd.read()
def test_fill_form_no_pdf(app, admin_user, pdf):
resp = app.post_json('/pdf/test/fill-form/', params={'a': 1})
assert resp.json == {
'data': None,
'err': 1,
'err_class': 'passerelle.utils.jsonresponse.APIError',
'err_desc': 'not PDF file configured',
}
def test_fill_form_no_fields_mapping(app, admin_user, pdf, cerfa_content):
pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content))
resp = app.post_json('/pdf/test/fill-form/', params={'a': 1})
assert resp.json == {
'data': None,
'err': 1,
'err_class': 'passerelle.utils.jsonresponse.APIError',
'err_desc': 'no fields mapping configured',
}
def test_fill_form_ok(app, admin_user, pdf, cerfa_content):
pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content))
app = login(app)
resp = app.get('/pdf/test/')
resp = resp.click('Fill form: Edit fields mapping')
img_tags = resp.pyquery('img')
image_resp = app.get(img_tags[0].attrib['src'])
assert b'PNG' in image_resp.content
pdf_ = PDF(cerfa_content)
page = pdf_.page(0)
checkbox_field = [field for field in page.fields if field.widget_type == 'checkbox'][0]
text_field = [field for field in page.fields if field.widget_type == 'text'][0]
assert checkbox_field.value is False
assert text_field.value == ''
resp.form.set(f'field_{checkbox_field.digest_id}', 'testme == "a"')
resp.form.set(f'field_{text_field.digest_id}', '{{ prenom }} {{ nom }}')
resp.form.submit().follow()
resp = app.post_json('/pdf/test/fill-form/', params={'testme': 'a', 'prenom': 'Jean', 'nom': 'Dupont'})
pdf_ = PDF(resp.content)
page = pdf_.page(0)
checkbox_field = [field for field in page.fields if field.widget_type == 'checkbox'][0]
text_field = [field for field in page.fields if field.widget_type == 'text'][0]
assert checkbox_field.value is True
assert text_field.value == 'Jean Dupont'
resp = app.post_json(
'/pdf/test/fill-form/?flatten=1', params={'testme': 'a', 'prenom': 'Jean', 'nom': 'Dupont'}
)
pdf_ = PDF(resp.content)
page = pdf_.page(0)
assert not page.fields