passerelle/tests/test_pdf.py

130 lines
5.2 KiB
Python

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2023 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import base64
import os
import subprocess
from unittest import mock
import pytest
from pdfrw import PdfReader
from passerelle.apps.pdf.models import Resource
from tests.utils import generic_endpoint_url, setup_access_rights
with open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf'), 'rb') as fd:
pdf_content = base64.b64encode(fd.read()).decode()
@pytest.fixture
def pdf(db):
return setup_access_rights(Resource.objects.create(slug='test'))
@mock.patch('subprocess.check_output')
def test_pdf_assemble(mocked_check_output, app, pdf):
endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug)
payload = {'filename': 'foo.pdf', 'files/0': {'content': pdf_content}}
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
assert mocked_check_output.call_count == 1
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 5
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('/pdf-0.pdf')
assert pdftk_call[2] == 'cat'
assert pdftk_call[3] == 'output'
assert pdftk_call[4] == '-'
assert mocked_check_output.call_args.kwargs['timeout'] == 20
payload = {
'filename': 'bar.pdf',
'files/0': {'content': ''},
'files/1': {'content': pdf_content},
'files/2': None,
'files/3': pdf_content,
'files/4': '',
}
mocked_check_output.reset_mock()
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="bar.pdf"'
assert mocked_check_output.call_count == 1
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 6
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('/pdf-1.pdf') # file 0
assert pdftk_call[2].endswith('/pdf-3.pdf') # file 2
# pdftk errors (faked)
payload = {'filename': 'out.pdf', 'files/0': {'content': pdf_content}}
mocked_check_output.reset_mock()
mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20)
resp = app.post_json(endpoint, params=payload, status=200)
assert mocked_check_output.call_count == 1
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('pdftk timed out after 20 seconds')
mocked_check_output.reset_mock()
mocked_check_output.side_effect = subprocess.CalledProcessError(cmd=[], returncode=42, output='ooops')
resp = app.post_json(endpoint, params=payload, status=200)
assert mocked_check_output.call_count == 1
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('pdftk returned non-zero exit status 42')
assert 'ooops' in resp.json['err_desc']
# bad calls errors
resp = app.post(endpoint, status=400)
assert resp.headers['content-type'].startswith('application/json')
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('could not decode body to json')
payload = {}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'filename' is a required property"
payload = {'filename': 'out.pdf'}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'files' is a required property"
payload = {'filename': 'out.pdf', 'files/0': 42}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "42 is not of type 'object'"
resp = app.get(endpoint, status=405)
def test_pdf_real_pdftk_call(app, pdf, settings):
if not os.path.exists(settings.PDFTK_PATH):
pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH)
endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug)
payload = {
'filename': 'twopages.pdf',
'files/0': {'content': pdf_content},
'files/1': {'content': pdf_content},
}
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="twopages.pdf"'
assert resp.content[:5] == b'%PDF-'
assert PdfReader(fdata=resp.content).numPages == 2