From 2ac7a1c99d55b16c0a146599c615e5fb588b02df Mon Sep 17 00:00:00 2001 From: Thomas NOEL Date: Mon, 3 Apr 2023 15:25:01 +0200 Subject: [PATCH] pdf: add watermark endpoint (#74796) --- passerelle/apps/pdf/models.py | 67 ++++++++++++++++++++++++ tests/test_pdf.py | 97 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/passerelle/apps/pdf/models.py b/passerelle/apps/pdf/models.py index 03d3ce8b..27e48c80 100644 --- a/passerelle/apps/pdf/models.py +++ b/passerelle/apps/pdf/models.py @@ -82,6 +82,30 @@ ASSEMBLE_SCHEMA = { ), } +WATERMARK_SCHEMA = { + '$schema': 'http://json-schema.org/draft-04/schema#', + 'title': '', + 'description': '', + 'type': 'object', + 'required': ['filename', 'file', 'stamp'], + 'unflatten': True, + 'properties': OrderedDict( + { + 'filename': { + 'description': _('output PDF filename'), + 'type': 'string', + }, + 'file': PDF_FILE_OBJECT, + 'stamp': PDF_FILE_OBJECT, + 'mode': { + 'description': _('watermark mode (default is background)'), + 'type': 'string', + 'enum': ['background', 'multibackground', 'stamp', 'multistamp'], + }, + } + ), +} + def validate_pdf(fieldfile): to_close = fieldfile.closed @@ -176,6 +200,49 @@ class Resource(BaseResource): response['Content-Disposition'] = 'attachment; filename="%s"' % filename return response + @endpoint( + description=_('Applies a PDF watermark (stamp) to a PDF file'), + perm='can_access', + methods=['post'], + display_order=0, + post={ + 'request_body': {'schema': {'application/json': WATERMARK_SCHEMA}}, + 'input_example': { + 'filename': 'output.pdf', + 'file': { + 'filename': 'example-1.pdf', + 'content_type': 'application/pdf', + 'content': 'JVBERi0xL...(base64 PDF)...', + }, + 'stamp': { + 'filename': 'example-2.pdf', + 'content_type': 'application/pdf', + 'content': '//4lUERGL...(base64 PDF)...', + }, + }, + }, + ) + def watermark(self, request, post_data): + filename = post_data.pop('filename') + mode = post_data.get('mode') or 'background' + + with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-watermark-' % self.id) as tmpdir: + + def create_file(key): + filename = os.path.join(tmpdir, 'pdf-%s.pdf' % key) + b64content = post_data[key]['content'] + with open(filename, mode='wb') as fd: + fd.write(base64.b64decode(b64content)) + return filename + + file = create_file('file') + stamp = create_file('stamp') + pdf_content = self.run_pdftk(args=[file, mode, stamp]) + + response = HttpResponse(pdf_content, content_type='application/pdf') + response['Content-Disposition'] = 'attachment; filename="%s"' % filename + return response + FILL_FORM_SCHEMA = { '$schema': 'http://json-schema.org/draft-04/schema#', 'title': '', diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 2d3eb976..4b3c1231 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -139,6 +139,103 @@ def test_pdf_real_pdftk_assemble(app, pdf, settings): assert PdfReader(fdata=resp.content).numPages == 2 +@mock.patch('subprocess.check_output') +def test_pdf_watermark(mocked_check_output, app, pdf): + endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug) + + payload = { + 'filename': 'foo.pdf', + 'file': {'content': pdf_b64content}, + 'stamp': {'content': pdf_b64content}, + } + resp = app.post_json(endpoint, params=payload, status=200) + assert resp.headers['content-type'] == 'application/pdf' + assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' + assert mocked_check_output.call_count == 1 + pdftk_call = mocked_check_output.call_args.args[0] + assert len(pdftk_call) == 6 + assert pdftk_call[0] == '/usr/bin/pdftk' + assert pdftk_call[1].endswith('/pdf-file.pdf') + assert pdftk_call[2] == 'background' + assert pdftk_call[3].endswith('/pdf-stamp.pdf') + assert pdftk_call[4] == 'output' + assert pdftk_call[5] == '-' + assert mocked_check_output.call_args.kwargs['timeout'] == 20 + + payload['mode'] = 'multistamp' + resp = app.post_json(endpoint, params=payload, status=200) + assert resp.headers['content-type'] == 'application/pdf' + assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"' + assert mocked_check_output.call_count == 2 + pdftk_call = mocked_check_output.call_args.args[0] + assert len(pdftk_call) == 6 + assert pdftk_call[0] == '/usr/bin/pdftk' + assert pdftk_call[1].endswith('/pdf-file.pdf') + assert pdftk_call[2] == 'multistamp' + assert pdftk_call[3].endswith('/pdf-stamp.pdf') + assert pdftk_call[4] == 'output' + assert pdftk_call[5] == '-' + assert mocked_check_output.call_args.kwargs['timeout'] == 20 + + # bad calls errors + resp = app.post(endpoint, status=400) + assert resp.headers['content-type'].startswith('application/json') + assert resp.json['err'] == 1 + assert resp.json['err_desc'].startswith('could not decode body to json') + + payload = {} + resp = app.post_json(endpoint, params=payload, status=400) + assert resp.json['err'] == 1 + assert resp.json['err_desc'] == "'filename' is a required property" + + payload = {'filename': 'out.pdf'} + resp = app.post_json(endpoint, params=payload, status=400) + assert resp.json['err'] == 1 + assert resp.json['err_desc'] == "'file' is a required property" + + payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}} + resp = app.post_json(endpoint, params=payload, status=400) + assert resp.json['err'] == 1 + assert resp.json['err_desc'] == "'stamp' is a required property" + + payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}, 'stamp': 42} + resp = app.post_json(endpoint, params=payload, status=400) + assert resp.json['err'] == 1 + assert resp.json['err_desc'] == "stamp: 42 is not of type 'object'" + + payload = { + 'filename': 'out.pdf', + 'file': {'content': pdf_b64content}, + 'stamp': {'content': pdf_b64content}, + 'mode': 'foobar', + } + resp = app.post_json(endpoint, params=payload, status=400) + assert resp.json['err'] == 1 + assert ( + resp.json['err_desc'] + == "mode: 'foobar' is not one of ['background', 'multibackground', 'stamp', 'multistamp']" + ) + + resp = app.get(endpoint, status=405) + + +def test_pdf_real_pdftk_watermark(app, pdf, settings): + if not os.path.exists(settings.PDFTK_PATH): + pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH) + + endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug) + payload = { + 'filename': 'watermark.pdf', + 'file': {'content': pdf_b64content}, + 'stamp': {'content': pdf_b64content}, + } + resp = app.post_json(endpoint, params=payload, status=200) + assert resp.headers['content-type'] == 'application/pdf' + assert resp.headers['content-disposition'] == 'attachment; filename="watermark.pdf"' + assert resp.content[:5] == b'%PDF-' + assert PdfReader(fdata=resp.content).numPages == 1 + + def test_pdf_validator(pdf): pdf.fill_form_file = File(BytesIO(pdf_content), 'default.pdf') pdf.save()