misc: second guess libmagic for PDF files with garbage at start (#74702) #184

Merged
fpeters merged 1 commits from wip/74702-pdf-offset into main 2023-03-24 09:12:01 +01:00
2 changed files with 45 additions and 0 deletions

View File

@ -332,6 +332,44 @@ def test_form_file_field_submit_wrong_mimetype(pub):
assert resp.text == '%PDF-1.4 ...'
def test_form_file_field_submit_garbage_pdf(pub):
FormDef.wipe()
formdef = FormDef()
formdef.name = 'test'
formdef.fields = [
fields.FileField(
id='0',
label='file',
document_type={
'id': 1,
'mimetypes': ['application/pdf'],
'label': 'PDF files',
},
)
]
formdef.store()
formdef.data_class().wipe()
upload = Upload('test.pdf', b'x' * 500, 'application/pdf')
resp = get_app(pub).get('/test/')
resp.forms[0]['f0$file'] = upload
resp = resp.forms[0].submit('submit')
assert resp.pyquery('#form_error_f0').text() == 'invalid file type'
upload = Upload('test.pdf', b'x' * 500 + b'%PDF-1.4 ...', 'application/pdf')
resp = get_app(pub).get('/test/')
resp.forms[0]['f0$file'] = upload
resp = resp.forms[0].submit('submit')
assert 'Check values then click submit.' in resp.text
resp = resp.forms[0].submit('submit').follow()
assert 'The form has been recorded' in resp.text
resp = resp.click('test.pdf')
assert resp.location.endswith('/test.pdf')
resp = resp.follow()
assert resp.content_type == 'application/pdf'
assert '%PDF-1.4' in resp.text
def test_form_file_field_submit_blacklist(pub):
FormDef.wipe()
formdef = FormDef()

View File

@ -903,6 +903,13 @@ class FileWithPreviewWidget(CompositeWidget):
elif magic and self.value.fp:
mime = magic.Magic(mime=True)
filetype = mime.from_file(self.value.fp.name)
if filetype in ('application/octet-stream', 'text/plain'):
# second-guess libmagic as we want to accept PDF files
# with some garbage at start.
with open(self.value.fp.name, 'rb') as fd:
first_bytes = fd.read(1024)
if b'%PDF' in first_bytes:
filetype = 'application/pdf'
else:
filetype = getattr(self.value, 'storage_attrs', {}).get('content_type')
if not filetype: