# passerelle - uniform access to multiple data sources and services # Copyright (C) 2023 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import base64 import os import subprocess from unittest import mock import pytest from pdfrw import PdfReader from passerelle.apps.pdf.models import Resource from tests.utils import generic_endpoint_url, setup_access_rights with open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf'), 'rb') as fd: pdf_content = base64.b64encode(fd.read()).decode() @pytest.fixture def pdf(db): return setup_access_rights(Resource.objects.create(slug='test')) @mock.patch('subprocess.check_output') def test_pdf_assemble(mocked_check_output, app, pdf): endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug) payload = {'filename': 'foo.pdf', 'files/0': {'content': pdf_content}} resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' assert mocked_check_output.call_count == 1 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 5 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-0.pdf') assert pdftk_call[2] == 'cat' assert pdftk_call[3] == 'output' assert pdftk_call[4] == '-' assert mocked_check_output.call_args.kwargs['timeout'] == 20 payload = { 'filename': 'bar.pdf', 'files/0': {'content': ''}, 'files/1': {'content': pdf_content}, 'files/2': None, 'files/3': pdf_content, 'files/4': '', } mocked_check_output.reset_mock() resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="bar.pdf"' assert mocked_check_output.call_count == 1 pdftk_call = mocked_check_output.call_args.args[0] assert len(pdftk_call) == 6 assert pdftk_call[0] == '/usr/bin/pdftk' assert pdftk_call[1].endswith('/pdf-1.pdf') # file 0 assert pdftk_call[2].endswith('/pdf-3.pdf') # file 2 # pdftk errors (faked) payload = {'filename': 'out.pdf', 'files/0': {'content': pdf_content}} mocked_check_output.reset_mock() mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20) resp = app.post_json(endpoint, params=payload, status=200) assert mocked_check_output.call_count == 1 assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('pdftk timed out after 20 seconds') mocked_check_output.reset_mock() mocked_check_output.side_effect = subprocess.CalledProcessError(cmd=[], returncode=42, output='ooops') resp = app.post_json(endpoint, params=payload, status=200) assert mocked_check_output.call_count == 1 assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('pdftk returned non-zero exit status 42') assert 'ooops' in resp.json['err_desc'] # bad calls errors resp = app.post(endpoint, status=400) assert resp.headers['content-type'].startswith('application/json') assert resp.json['err'] == 1 assert resp.json['err_desc'].startswith('could not decode body to json') payload = {} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'filename' is a required property" payload = {'filename': 'out.pdf'} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "'files' is a required property" payload = {'filename': 'out.pdf', 'files/0': 42} resp = app.post_json(endpoint, params=payload, status=400) assert resp.json['err'] == 1 assert resp.json['err_desc'] == "42 is not of type 'object'" resp = app.get(endpoint, status=405) def test_pdf_real_pdftk_call(app, pdf, settings): if not os.path.exists(settings.PDFTK_PATH): pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH) endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug) payload = { 'filename': 'twopages.pdf', 'files/0': {'content': pdf_content}, 'files/1': {'content': pdf_content}, } resp = app.post_json(endpoint, params=payload, status=200) assert resp.headers['content-type'] == 'application/pdf' assert resp.headers['content-disposition'] == 'attachment; filename="twopages.pdf"' assert resp.content[:5] == b'%PDF-' assert PdfReader(fdata=resp.content).numPages == 2