# passerelle - uniform access to multiple data sources and services # Copyright (C) 2023 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import base64 import os import subprocess from io import BytesIO from unittest import mock import pytest from django.core.exceptions import ValidationError from django.core.files import File from django.core.files.base import ContentFile from pdfrw import PdfReader from passerelle.apps.pdf.models import Resource from passerelle.utils.pdf import PDF from tests.test_manager import login from tests.utils import generic_endpoint_url, setup_access_rights with open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf'), 'rb') as fd: pdf_content = fd.read() pdf_b64content = base64.b64encode(pdf_content).decode() with open(os.path.join(os.path.dirname(__file__), 'data', 'pdf-form.pdf'), 'rb') as fd: acroform_content = fd.read() acroform_b64content = base64.b64encode(acroform_content).decode() @pytest.fixture def pdf(db): return setup_access_rights(Resource.objects.create(slug='test', title='test', description='test')) @mock.patch('subprocess.check_output') def test_pdf_assemble(mocked_check_output, app, pdf): endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug) payload = {'filename': 'foo.pdf', 'files/0': {'content': pdf_b64content}} resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' assert mocked_check_output.call_count == 1 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 5 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-0.pdf') assert pdftk_call[2] == 'cat' assert pdftk_call[3] == 'output' assert pdftk_call[4] == '-' assert mocked_check_output.call_args.kwargs['timeout'] == 20 payload = { 'filename': 'bar.pdf', 'files/0': {'content': ''}, 'files/1': {'content': pdf_b64content}, 'files/2': None, 'files/3': pdf_b64content, 'files/4': '', } mocked_check_output.reset_mock() resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="bar.pdf"' assert mocked_check_output.call_count == 1 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 6 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-1.pdf') # file 0 assert pdftk_call[2].endswith('/pdf-3.pdf') # file 2 # pdftk errors (faked) payload = {'filename': 'out.pdf', 'files/0': {'content': pdf_b64content}} mocked_check_output.reset_mock() mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20) resp = app.post_json(endpoint, params=payload, status=200) assert mocked_check_output.call_count == 1 assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('pdftk timed out after 20 seconds') mocked_check_output.reset_mock() mocked_check_output.side_effect = subprocess.CalledProcessError(cmd=[], returncode=42, output='ooops') resp = app.post_json(endpoint, params=payload, status=200) assert mocked_check_output.call_count == 1 assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('pdftk returned non-zero exit status 42') assert 'ooops' in resp.json['err_desc'] # bad calls errors resp = app.post(endpoint, status=400) assert resp.headers['content-type'].startswith('application/json') assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('could not decode body to json') payload = {} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'filename' is a required property" payload = {'filename': 'out.pdf'} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'files' is a required property" payload = {'filename': 'out.pdf', 'files/0': 42} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_class'] == 'passerelle.utils.json.JSONValidationError' resp = app.get(endpoint, status=405) def test_pdf_real_pdftk_assemble(app, pdf, settings): if not os.path.exists(settings.PDFTK_PATH): pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH) endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug) payload = { 'filename': 'twopages.pdf', 'files/0': {'content': pdf_b64content}, 'files/1': {'content': pdf_b64content}, } resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="twopages.pdf"' assert resp.content[:5] == b'%PDF-' assert PdfReader(fdata=resp.content).numPages == 2 @mock.patch('subprocess.check_output') def test_pdf_watermark(mocked_check_output, app, pdf): endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug) payload = { 'filename': 'foo.pdf', 'file': {'content': pdf_b64content}, 'stamp': {'content': pdf_b64content}, } resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' assert mocked_check_output.call_count == 1 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 6 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-file.pdf') assert pdftk_call[2] == 'background' assert pdftk_call[3].endswith('/pdf-stamp.pdf') assert pdftk_call[4] == 'output' assert pdftk_call[5] == '-' assert mocked_check_output.call_args.kwargs['timeout'] == 20 payload['mode'] = 'multistamp' resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' assert mocked_check_output.call_count == 2 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 6 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-file.pdf') assert pdftk_call[2] == 'multistamp' assert pdftk_call[3].endswith('/pdf-stamp.pdf') assert pdftk_call[4] == 'output' assert pdftk_call[5] == '-' assert mocked_check_output.call_args.kwargs['timeout'] == 20 # bad calls errors resp = app.post(endpoint, status=400) assert resp.headers['content-type'].startswith('application/json') assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('could not decode body to json') payload = {} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'filename' is a required property" payload = {'filename': 'out.pdf'} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'file' is a required property" payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'stamp' is a required property" payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}, 'stamp': 42} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "stamp: 42 is not of type 'object'" payload = { 'filename': 'out.pdf', 'file': {'content': pdf_b64content}, 'stamp': {'content': pdf_b64content}, 'mode': 'foobar', } resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert ( resp.json['err_desc'] == "mode: 'foobar' is not one of ['background', 'multibackground', 'stamp', 'multistamp']" ) resp = app.get(endpoint, status=405) def test_pdf_real_pdftk_watermark(app, pdf, settings): if not os.path.exists(settings.PDFTK_PATH): pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH) endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug) payload = { 'filename': 'watermark.pdf', 'file': {'content': pdf_b64content}, 'stamp': {'content': pdf_b64content}, } resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="watermark.pdf"' assert resp.content[:5] == b'%PDF-' assert PdfReader(fdata=resp.content).numPages == 1 def test_pdf_validator(pdf): pdf.fill_form_file = File(BytesIO(pdf_content), 'default.pdf') pdf.save() pdf.full_clean() pdf.fill_form_file = File(BytesIO(acroform_content), 'default.pdf') pdf.save() pdf.full_clean() pdf.fill_form_file = File(BytesIO(b'not a pdf'), 'test.txt') pdf.save() with pytest.raises(ValidationError): pdf.full_clean() @pytest.fixture def cerfa_content(): with open('tests/data/cerfa_10072-02.pdf', 'rb') as fd: return fd.read() def test_fill_form_no_pdf(app, admin_user, pdf): resp = app.get('/pdf/test') assert 'Fill form: Edit fields mapping' not in resp.text resp = app.get('/pdf/test/fields-mapping/edit/', status=404) resp = app.post_json('/pdf/test/fill-form/', params={'a': 1}) assert resp.json == { 'data': None, 'err': 1, 'err_class': 'passerelle.utils.jsonresponse.APIError', 'err_desc': 'not PDF file configured', } def test_fill_form_no_fields_mapping(app, admin_user, pdf, cerfa_content): pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content)) resp = app.post_json('/pdf/test/fill-form/', params={'a': 1}) assert resp.json == { 'data': None, 'err': 1, 'err_class': 'passerelle.utils.jsonresponse.APIError', 'err_desc': 'no fields mapping configured', } def test_fill_form_ok(app, admin_user, pdf, cerfa_content): pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content)) app = login(app) resp = app.get('/pdf/test/') resp = resp.click('Fill form: Edit fields mapping') img_tags = resp.pyquery('img') image_resp = app.get(img_tags[0].attrib['src']) assert b'PNG' in image_resp.content pdf_ = PDF(cerfa_content) page = pdf_.page(0) checkbox_field = [field for field in page.fields if field.widget_type == 'checkbox'][0] text_field = [field for field in page.fields if field.widget_type == 'text'][0] assert checkbox_field.value is False assert text_field.value == '' resp.form.set(f'field_{checkbox_field.digest_id}', 'testme == "a"') resp.form.set(f'field_{text_field.digest_id}', '{{ prenom }} {{ nom }}') resp.form.submit().follow() resp = app.post_json('/pdf/test/fill-form/', params={'testme': 'a', 'prenom': 'Jean', 'nom': 'Dupont'}) pdf_ = PDF(resp.content) page = pdf_.page(0) checkbox_field = [field for field in page.fields if field.widget_type == 'checkbox'][0] text_field = [field for field in page.fields if field.widget_type == 'text'][0] assert checkbox_field.value is True assert text_field.value == 'Jean Dupont' resp = app.post_json( '/pdf/test/fill-form/?flatten=1', params={'testme': 'a', 'prenom': 'Jean', 'nom': 'Dupont'} ) pdf_ = PDF(resp.content) page = pdf_.page(0) assert not page.fields def test_add_or_edit(app, admin_user, pdf, cerfa_content): app = login(app) resp = app.get('/manage/pdf/add') assert b'fields_mapping' not in resp.content pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content)) resp = app.get('/manage/pdf/test/edit') assert b'fields_mapping' not in resp.content def test_thumbnail_url_contains_hash(app, admin_user, pdf, cerfa_content): pdf.fill_form_file.save('form.pdf', ContentFile(cerfa_content)) app = login(app) resp = app.get('/pdf/test/') resp = resp.click('Fill form: Edit fields mapping') img_tags = resp.pyquery('img') urls = [img_tag.attrib['src'] for img_tag in img_tags] assert len(urls) == 5 # cerfa_content has 5 pages assert all('?hash=' in url for url in urls) assert not any(url.endswith('?hash=0') for url in urls) first_url = urls[0] pdf.fill_form_file.save('form.pdf', ContentFile(acroform_content)) resp = app.get('/pdf/test/') resp = resp.click('Fill form: Edit fields mapping') img_tags = resp.pyquery('img') urls = [img_tag.attrib['src'] for img_tag in img_tags] assert len(urls) == 1 # acroform_content has 1 page assert all('?hash=' in url for url in urls) assert not any(url.endswith('?hash=0') for url in urls) assert urls[0] != first_url