pdf: add watermark endpoint (#74796)
gitea/passerelle/pipeline/head This commit looks good Details

This commit is contained in:
Thomas NOËL 2023-04-03 15:25:01 +02:00 committed by Thomas NOËL
parent d0c271ac41
commit 2ac7a1c99d
2 changed files with 164 additions and 0 deletions

View File

@ -82,6 +82,30 @@ ASSEMBLE_SCHEMA = {
),
}
WATERMARK_SCHEMA = {
'$schema': 'http://json-schema.org/draft-04/schema#',
'title': '',
'description': '',
'type': 'object',
'required': ['filename', 'file', 'stamp'],
'unflatten': True,
'properties': OrderedDict(
{
'filename': {
'description': _('output PDF filename'),
'type': 'string',
},
'file': PDF_FILE_OBJECT,
'stamp': PDF_FILE_OBJECT,
'mode': {
'description': _('watermark mode (default is background)'),
'type': 'string',
'enum': ['background', 'multibackground', 'stamp', 'multistamp'],
},
}
),
}
def validate_pdf(fieldfile):
to_close = fieldfile.closed
@ -176,6 +200,49 @@ class Resource(BaseResource):
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
return response
@endpoint(
description=_('Applies a PDF watermark (stamp) to a PDF file'),
perm='can_access',
methods=['post'],
display_order=0,
post={
'request_body': {'schema': {'application/json': WATERMARK_SCHEMA}},
'input_example': {
'filename': 'output.pdf',
'file': {
'filename': 'example-1.pdf',
'content_type': 'application/pdf',
'content': 'JVBERi0xL...(base64 PDF)...',
},
'stamp': {
'filename': 'example-2.pdf',
'content_type': 'application/pdf',
'content': '//4lUERGL...(base64 PDF)...',
},
},
},
)
def watermark(self, request, post_data):
filename = post_data.pop('filename')
mode = post_data.get('mode') or 'background'
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-watermark-' % self.id) as tmpdir:
def create_file(key):
filename = os.path.join(tmpdir, 'pdf-%s.pdf' % key)
b64content = post_data[key]['content']
with open(filename, mode='wb') as fd:
fd.write(base64.b64decode(b64content))
return filename
file = create_file('file')
stamp = create_file('stamp')
pdf_content = self.run_pdftk(args=[file, mode, stamp])
response = HttpResponse(pdf_content, content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
return response
FILL_FORM_SCHEMA = {
'$schema': 'http://json-schema.org/draft-04/schema#',
'title': '',

View File

@ -139,6 +139,103 @@ def test_pdf_real_pdftk_assemble(app, pdf, settings):
assert PdfReader(fdata=resp.content).numPages == 2
@mock.patch('subprocess.check_output')
def test_pdf_watermark(mocked_check_output, app, pdf):
endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug)
payload = {
'filename': 'foo.pdf',
'file': {'content': pdf_b64content},
'stamp': {'content': pdf_b64content},
}
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
assert mocked_check_output.call_count == 1
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 6
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('/pdf-file.pdf')
assert pdftk_call[2] == 'background'
assert pdftk_call[3].endswith('/pdf-stamp.pdf')
assert pdftk_call[4] == 'output'
assert pdftk_call[5] == '-'
assert mocked_check_output.call_args.kwargs['timeout'] == 20
payload['mode'] = 'multistamp'
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
assert mocked_check_output.call_count == 2
pdftk_call = mocked_check_output.call_args.args[0]
assert len(pdftk_call) == 6
assert pdftk_call[0] == '/usr/bin/pdftk'
assert pdftk_call[1].endswith('/pdf-file.pdf')
assert pdftk_call[2] == 'multistamp'
assert pdftk_call[3].endswith('/pdf-stamp.pdf')
assert pdftk_call[4] == 'output'
assert pdftk_call[5] == '-'
assert mocked_check_output.call_args.kwargs['timeout'] == 20
# bad calls errors
resp = app.post(endpoint, status=400)
assert resp.headers['content-type'].startswith('application/json')
assert resp.json['err'] == 1
assert resp.json['err_desc'].startswith('could not decode body to json')
payload = {}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'filename' is a required property"
payload = {'filename': 'out.pdf'}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'file' is a required property"
payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "'stamp' is a required property"
payload = {'filename': 'out.pdf', 'file': {'content': pdf_b64content}, 'stamp': 42}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert resp.json['err_desc'] == "stamp: 42 is not of type 'object'"
payload = {
'filename': 'out.pdf',
'file': {'content': pdf_b64content},
'stamp': {'content': pdf_b64content},
'mode': 'foobar',
}
resp = app.post_json(endpoint, params=payload, status=400)
assert resp.json['err'] == 1
assert (
resp.json['err_desc']
== "mode: 'foobar' is not one of ['background', 'multibackground', 'stamp', 'multistamp']"
)
resp = app.get(endpoint, status=405)
def test_pdf_real_pdftk_watermark(app, pdf, settings):
if not os.path.exists(settings.PDFTK_PATH):
pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH)
endpoint = generic_endpoint_url('pdf', 'watermark', slug=pdf.slug)
payload = {
'filename': 'watermark.pdf',
'file': {'content': pdf_b64content},
'stamp': {'content': pdf_b64content},
}
resp = app.post_json(endpoint, params=payload, status=200)
assert resp.headers['content-type'] == 'application/pdf'
assert resp.headers['content-disposition'] == 'attachment; filename="watermark.pdf"'
assert resp.content[:5] == b'%PDF-'
assert PdfReader(fdata=resp.content).numPages == 1
def test_pdf_validator(pdf):
pdf.fill_form_file = File(BytesIO(pdf_content), 'default.pdf')
pdf.save()