to_pdf: detect PDF with BOM prefix (#32088)
This commit is contained in:
parent
1eff11f7f3
commit
b00b070523
|
@ -14,6 +14,7 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import codecs
|
||||
import re
|
||||
import unicodedata
|
||||
import warnings
|
||||
|
@ -24,7 +25,7 @@ from PIL import Image
|
|||
|
||||
|
||||
def to_pdf(content):
|
||||
if content.startswith('%PDF'):
|
||||
if content.startswith(('%PDF', codecs.BOM + '%PDF', codecs.BOM_UTF8 + '%PDF')):
|
||||
return content
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
%PDF-1.1
|
||||
%¥±ë
|
||||
|
||||
% MIT License
|
||||
%
|
||||
% Copyright (c) 2010 Brendan Zagaeski
|
||||
%
|
||||
% Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
% of this software and associated documentation files (the "Software"), to deal
|
||||
% in the Software without restriction, including without limitation the rights
|
||||
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
% copies of the Software, and to permit persons to whom the Software is
|
||||
% furnished to do so, subject to the following conditions:
|
||||
%
|
||||
% The above copyright notice and this permission notice shall be included in all
|
||||
% copies or substantial portions of the Software.
|
||||
%
|
||||
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
% SOFTWARE.
|
||||
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<< /Type /Pages
|
||||
/Kids [3 0 R]
|
||||
/Count 1
|
||||
/MediaBox [0 0 300 144]
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<< /Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources
|
||||
<< /Font
|
||||
<< /F1
|
||||
<< /Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
>>
|
||||
>>
|
||||
/Contents 4 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<< /Length 55 >>
|
||||
stream
|
||||
BT
|
||||
/F1 18 Tf
|
||||
0 0 Td
|
||||
(Hello World) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 5
|
||||
0000000000 65535 f
|
||||
0000001130 00000 n
|
||||
0000001189 00000 n
|
||||
0000001290 00000 n
|
||||
0000001569 00000 n
|
||||
trailer
|
||||
<< /Root 1 0 R
|
||||
/Size 5
|
||||
>>
|
||||
startxref
|
||||
1677
|
||||
%%EOF
|
|
@ -0,0 +1,80 @@
|
|||
倥䙄ㄭㄮ
|
||||
숥슥쎱<EFBFBD>
|
||||
|
||||
‥䥍⁔楌散獮<EFBFBD>
|
||||
<EFBFBD>
|
||||
‥潃祰楲桧⁴挨
〲〱䈠敲摮湡娠条敡歳<EFBFBD>
|
||||
<EFBFBD>
|
||||
‥敐浲獩楳湯椠敨敲祢朠慲瑮摥牦敥漠档牡敧潴愠祮瀠牥潳扯慴湩湩潣祰
|
||||
‥景琠楨潳瑦慷敲愠摮愠獳捯慩整潤畣敭瑮瑡潩楦敬琨敨∠潓瑦慷敲⤢潴搠慥<EFBFBD>
|
||||
‥湩琠敨匠景睴牡楷桴畯⁴敲瑳楲瑣潩Ɱ椠据畬楤杮眠瑩潨瑵氠浩瑩瑡潩桴楲桧獴
|
||||
‥潴甠敳潣祰潭楤祦敭杲ⱥ瀠扵楬桳楤瑳楲畢整畳汢捩湥敳湡⽤牯猠汥<EFBFBD>
|
||||
‥潣楰獥漠桴潓瑦慷敲湡潴瀠牥業⁴数獲湯潴眠潨桴潓瑦慷敲椠<EFBFBD>
|
||||
‥畦湲獩敨潴搠潳畳橢捥⁴潴琠敨映汯潬楷杮挠湯楤楴湯㩳
|
||||
<EFBFBD>
|
||||
‥桔扡癯潣祰楲桧⁴潮楴散愠摮琠楨数浲獩楳湯渠瑯捩桳污敢椠据畬敤湩愠汬
|
||||
‥潣楰獥漠畳獢慴瑮慩潰瑲潩獮漠桴潓瑦慷敲<EFBFBD>
|
||||
<EFBFBD>
|
||||
‥䡔⁅体呆䅗䕒䤠⁓剐噏䑉䑅∠十䤠≓䥗䡔問⁔䅗剒乁奔传⁆乁⁙䥋䑎塅剐卅⁓剏
|
||||
‥䵉䱐䕉ⱄ䤠䍎啌䥄䝎䈠呕丠呏䰠䵉呉䑅吠⁏䡔⁅䅗剒乁䥔卅传⁆䕍䍒䅈呎䉁䱉呉ⱙ
|
||||
‥䥆乔卅⁓但⁒⁁䅐呒䍉䱕剁倠剕佐䕓䄠䑎丠乏义剆义䕇䕍呎义丠⁏噅久⁔䡓䱁⁌䡔<EFBFBD>
|
||||
‥啁䡔剏⁓剏䌠偏剙䝉呈䠠䱏䕄卒䈠⁅䥌䉁䕌䘠剏䄠奎䌠䅌䵉䅄䅍䕇⁓剏传䡔剅
|
||||
‥䥌䉁䱉呉ⱙ圠䕈䡔剅䤠⁎乁䄠呃佉⁎䙏䌠乏剔䍁ⱔ吠剏⁔剏传䡔剅䥗䕓剁卉义⁇剆䵏<EFBFBD>
|
||||
‥問⁔䙏传⁒义䌠乏䕎呃佉⁎䥗䡔吠䕈匠䙏坔剁⁅剏吠䕈唠䕓传⁒呏䕈⁒䕄䱁义升䤠⁎䡔<EFBFBD>
|
||||
‥体呆䅗䕒<EFBFBD>
|
||||
|
||||
‱‰扯<EFBFBD>
|
||||
†㰼⼠祔数⼠慃慴潬<EFBFBD>
|
||||
††⼠慐敧′‰<EFBFBD>
|
||||
†㸾
|
||||
湥潤橢
|
||||
|
||||
′‰扯<EFBFBD>
|
||||
†㰼⼠祔数⼠慐敧<EFBFBD>
|
||||
††⼠楋獤嬠″‰嵒
|
||||
††⼠潃湵⁴<EFBFBD>
|
||||
††⼠敍楤䉡硯嬠‰‰〳‰㐱崴
|
||||
†㸾
|
||||
湥潤橢
|
||||
|
||||
″‰扯<EFBFBD>
|
||||
†㰼†启灹倯条<EFBFBD>
|
||||
†††倯牡湥⁴′‰<EFBFBD>
|
||||
†††刯獥畯捲獥
|
||||
†††㰠‼䘯湯<EFBFBD>
|
||||
†††††㰠‼䘯<EFBFBD>
|
||||
†††††††㰠‼启灹䘯湯<EFBFBD>
|
||||
†††††††††匯扵祴数⼠祔数<EFBFBD>
|
||||
†††††††††䈯獡䙥湯⁴启浩獥刭浯湡
|
||||
†††††††㸠<EFBFBD>
|
||||
†††††㸠<EFBFBD>
|
||||
†††㸠<EFBFBD>
|
||||
†††䌯湯整瑮‴‰<EFBFBD>
|
||||
†㸾
|
||||
湥潤橢
|
||||
|
||||
‴‰扯<EFBFBD>
|
||||
†㰼⼠敌杮桴㔠‵㸾
|
||||
瑳敲浡
|
||||
†呂
|
||||
††䘯‱㠱吠<EFBFBD>
|
||||
††‰‰摔
|
||||
††䠨汥潬圠牯摬
橔
|
||||
†呅
|
||||
湥獤牴慥<EFBFBD>
|
||||
湥潤橢
|
||||
|
||||
牸晥
|
||||
‰<EFBFBD>
|
||||
〰〰〰〰〰㘠㔵㔳映<EFBFBD>
|
||||
〰〰〰ㄱ〳〠〰〰渠<EFBFBD>
|
||||
〰〰〰ㄱ㤸〠〰〰渠<EFBFBD>
|
||||
〰〰〰㈱〹〠〰〰渠<EFBFBD>
|
||||
〰〰〰㔱㤶〠〰〰渠<EFBFBD>
|
||||
牴楡敬<EFBFBD>
|
||||
†㰼†刯潯⁴‱‰<EFBFBD>
|
||||
†††匯穩<EFBFBD>
|
||||
†㸾
|
||||
瑳牡硴敲<EFBFBD>
|
||||
㘱㜷
|
||||
┥佅<EFBFBD>
|
|
@ -0,0 +1,80 @@
|
|||
%PDF-1.1
|
||||
%¥±ë
|
||||
|
||||
% MIT License
|
||||
%
|
||||
% Copyright (c) 2010 Brendan Zagaeski
|
||||
%
|
||||
% Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
% of this software and associated documentation files (the "Software"), to deal
|
||||
% in the Software without restriction, including without limitation the rights
|
||||
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
% copies of the Software, and to permit persons to whom the Software is
|
||||
% furnished to do so, subject to the following conditions:
|
||||
%
|
||||
% The above copyright notice and this permission notice shall be included in all
|
||||
% copies or substantial portions of the Software.
|
||||
%
|
||||
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
% SOFTWARE.
|
||||
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<< /Type /Pages
|
||||
/Kids [3 0 R]
|
||||
/Count 1
|
||||
/MediaBox [0 0 300 144]
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<< /Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources
|
||||
<< /Font
|
||||
<< /F1
|
||||
<< /Type /Font
|
||||
/Subtype /Type1
|
||||
/BaseFont /Times-Roman
|
||||
>>
|
||||
>>
|
||||
>>
|
||||
/Contents 4 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<< /Length 55 >>
|
||||
stream
|
||||
BT
|
||||
/F1 18 Tf
|
||||
0 0 Td
|
||||
(Hello World) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 5
|
||||
0000000000 65535 f
|
||||
0000001130 00000 n
|
||||
0000001189 00000 n
|
||||
0000001290 00000 n
|
||||
0000001569 00000 n
|
||||
trailer
|
||||
<< /Root 1 0 R
|
||||
/Size 5
|
||||
>>
|
||||
startxref
|
||||
1677
|
||||
%%EOF
|
|
@ -0,0 +1,12 @@
|
|||
import os
|
||||
|
||||
from passerelle.utils.conversion import to_pdf
|
||||
|
||||
|
||||
def test_pdf_to_pdf_do_nothing():
|
||||
pdf = open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf')).read()
|
||||
assert to_pdf(pdf) == pdf
|
||||
pdf = open(os.path.join(os.path.dirname(__file__), 'data', 'minimal_bom.pdf')).read()
|
||||
assert to_pdf(pdf) == pdf
|
||||
pdf = open(os.path.join(os.path.dirname(__file__), 'data', 'minimal_bomutf8.pdf')).read()
|
||||
assert to_pdf(pdf) == pdf
|
Loading…
Reference in New Issue