cartads_cs: check CERFA uploads are PDF with embedded forms (#35794)

This commit is contained in:
Frédéric Péters 2019-09-03 18:19:22 +02:00
parent 9546d41f7c
commit 652bc03715
5 changed files with 140 additions and 5 deletions

3
debian/control vendored
View File

@ -35,7 +35,8 @@ Depends: ${python:Depends},
python-pyexcel-ods,
python-pyexcel-xls,
python-crypto,
python-feedparser
python-feedparser,
python-pdfrw
Recommends: python-soappy, python-phpserialize
Description: Uniform access to multiple data sources and services (Python module)

View File

@ -23,6 +23,9 @@ import os
from xml.etree import ElementTree as etree
import zipfile
import pdfrw
import pdfrw.findobjs
from Crypto.Cipher import AES
from django.conf import settings
@ -434,6 +437,13 @@ class AbstractCartaDSCS(BaseResource):
return []
signer = Signer(salt='cart@ds_cs')
tracking_code = signer.unsign(token)
if id_piece.startswith('cerfa-'):
try:
pdf = pdfrw.PdfReader(request.FILES['files[]'])
if not any(pdfrw.findobjs.find_objects(pdf, valid_subtypes=(pdfrw.PdfName.Form,))):
return [{'error': _('The CERFA should not be a scanned document.')}]
except pdfrw.PdfParseError:
return [{'error': _('The CERFA should be a PDF file.')}]
file_upload = CartaDSFile(
tracking_code=tracking_code,
id_piece=id_piece,

View File

@ -108,6 +108,7 @@ setup(name='passerelle',
'pycrypto',
'unidecode',
'paramiko',
'pdfrw',
],
cmdclass={
'build': build,

112
tests/data/pdf-form.pdf Normal file
View File

@ -0,0 +1,112 @@
%PDF-1.4
%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
1 0 obj
<<
/F1 2 0 R
>>
endobj
2 0 obj
<<
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
>>
endobj
3 0 obj
<</Type /Encoding /Differences [24 /breve /caron /circumflex /dotaccent /hungarumlaut /ogonek /ring /tilde 39 /quotesingle 96 /grave 128 /bullet /dagger /daggerdbl /ellipsis /emdash /endash /florin /fraction /guilsinglleft /guilsinglright /minus /perthousand /quotedblbase /quotedblleft /quotedblright /quoteleft /quoteright /quotesinglbase /trademark /fi /fl /Lslash /OE /Scaron /Ydieresis /Zcaron /dotlessi /lslash /oe /scaron /zcaron 160 /Euro 164 /currency 166 /brokenbar 168 /dieresis /copyright /ordfeminine 172 /logicalnot /.notdef /registered /macron /degree /plusminus /twosuperior /threesuperior /acute /mu 183 /periodcentered /cedilla /onesuperior /ordmasculine 188 /onequarter /onehalf /threequarters 192 /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis]>>
endobj
4 0 obj
<< /BaseFont /Helvetica /Subtype /Type1 /Name /Helv /Type /Font /Encoding 3 0 R >>
endobj
5 0 obj
<<
/BBox [ 0 0 300 36 ] /Filter [ /FlateDecode ] /FormType 1 /Length 171 /Matrix [ 1 0 0 1 0 0 ] /Resources << /ProcSet [/PDF /Text] /Font <</Helv 4 0 R>> >>
/Subtype /Form /Type /XObject
>>
stream
xœMOK
Â0ÜÏ)f©6Ÿ¦6ÛR7](/`¤¬ ß—~ <>7òf†LR0)2KÍñEE«drŽnHœÒÞYÞ¡E0Æû•´Y$ëeÅwpÆGÛ¢&¢e§'œuÑm¶)bԜϹé*qŒ³äȧÞHÃ<48>e[¯)ÇøœÖÄÕOi[4(Òcר Ã&WJf4òvÅnÏð@pBÝVø¨5endstream
endobj
6 0 obj
<<
/AP <<
/N 5 0 R
>> /BS <<
/S /I /W 1
>> /DA (/Helv 12 Tf .1 .1 .1 rg) /DV () /F 4 /FT /Tx
/Ff 0 /MK <<
/BC [ .1 .1 .1 ] /BG [ .8 .843 1 ]
>> /MaxLen 100 /P 7 0 R /Rect [ 110 635 410 671 ] /Subtype /Widget
/T (fname) /TU (First Name) /Type /Annot /V ()
>>
endobj
7 0 obj
<<
/Annots [ 6 0 R ] /Contents 11 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 10 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0
/Trans <<
>> /Type /Page
>>
endobj
8 0 obj
<<
/AcroForm 12 0 R /PageMode /UseNone /Pages 10 0 R /Type /Catalog
>>
endobj
9 0 obj
<<
/Author (anonymous) /CreationDate (D:20190903184353+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20190903184353+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
/Subject (unspecified) /Title (untitled) /Trapped /False
>>
endobj
10 0 obj
<<
/Count 1 /Kids [ 7 0 R ] /Type /Pages
>>
endobj
11 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 126
>>
stream
Gaqck0a`Ou'F"A`MQ&2A5'L)8'f:o0@;c^9<<BGqqH78BhIBr$6rTs)+"nHeodnUUT.#"2021%_VSBG#BeHRGDrs4>a&%@t[l74FM(YU@Fr1LA@q2TV2=(]H6Pek~>endstream
endobj
12 0 obj
<<
/DA (/Helv 0 Tf 0 g) /DR << /Encoding
<<
/RLAFencoding
3 0 R
>>
/Font << /Helv 4 0 R >>
>> /Fields [ 6 0 R ]
>>
endobj
xref
0 13
0000000000 65535 f
0000000073 00000 n
0000000104 00000 n
0000000211 00000 n
0000001533 00000 n
0000001631 00000 n
0000002028 00000 n
0000002307 00000 n
0000002530 00000 n
0000002616 00000 n
0000002912 00000 n
0000002972 00000 n
0000003189 00000 n
trailer
<<
/ID
[<30d7bc8d9857e1dc55dd88e74a04dac2><30d7bc8d9857e1dc55dd88e74a04dac2>]
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
/Info 9 0 R
/Root 8 0 R
/Size 13
>>
startxref
3320
%%EOF

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import datetime
import os
import mock
from httmock import HTTMock
@ -142,6 +143,16 @@ def test_pieces_management(connector, app, cached_data):
resp = app.post(data[0]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', '%PDF...')])
assert resp.json == [{'error': 'The CERFA should be a PDF file.'}]
pdf_contents = open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf')).read()
resp = app.post(data[0]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', pdf_contents)])
assert resp.json == [{'error': 'The CERFA should not be a scanned document.'}]
pdf_contents = open(os.path.join(os.path.dirname(__file__), 'data', 'pdf-form.pdf')).read()
resp = app.post(data[0]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', pdf_contents)])
cerfa_token = resp.json[0]['token']
resp = app.get('/cartads-cs/test/pieces?type_dossier_id=CU&objet_demande_id=1&tracking_code=BBBBBBBB')
@ -156,12 +167,12 @@ def test_pieces_management(connector, app, cached_data):
assert 'name' not in data[0]['files'][0]
resp = app.post(data[0]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', '%PDF...')])
upload_files=[('files[]', 'test.pdf', pdf_contents)])
resp = app.post(data[1]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', '%PDF...')])
upload_files=[('files[]', 'test.pdf', pdf_contents)])
resp = app.post(data[1]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', '%PDF...')])
upload_files=[('files[]', 'test.pdf', pdf_contents)])
resp = app.get('/cartads-cs/test/pieces?type_dossier_id=CU&objet_demande_id=1&tracking_code=BBBBBBBB')
data = resp.json['data']
assert len(data[1]['files']) == 3
@ -170,7 +181,7 @@ def test_pieces_management(connector, app, cached_data):
assert resp.json == {'result': False, 'err': 0}
resp = app.post(data[2]['files'][0]['url'],
upload_files=[('files[]', 'test.pdf', '%PDF...')])
upload_files=[('files[]', 'test.pdf', pdf_contents)])
resp = app.get('/cartads-cs/test/check_pieces?type_dossier_id=CU&objet_demande_id=1&tracking_code=BBBBBBBB')
assert resp.json == {'result': True, 'err': 0}