diff --git a/weasyprint/pdf.py b/weasyprint/pdf.py index 7115a234..aca3deca 100644 --- a/weasyprint/pdf.py +++ b/weasyprint/pdf.py @@ -89,13 +89,15 @@ def _create_compressed_file_object(source): pdf_file_object = PdfDict( Type=PdfName('EmbeddedFile'), Filter=PdfName('FlateDecode')) - pdf_file_object.stream = b'' + + # pdfrw needs Latin-1-decoded unicode strings in object.stream + pdf_file_object.stream = '' size = 0 for data in iter(lambda: source.read(4096), b''): size += len(data) md5.update(data) - pdf_file_object.stream += compress.compress(data) - pdf_file_object.stream += compress.flush(zlib.Z_FINISH) + pdf_file_object.stream += compress.compress(data).decode('latin-1') + pdf_file_object.stream += compress.flush(zlib.Z_FINISH).decode('latin-1') pdf_file_object.Params = PdfDict( CheckSum=PdfString('<{}>'.format(md5.hexdigest())), Size=size) return pdf_file_object diff --git a/weasyprint/tests/test_pdf.py b/weasyprint/tests/test_pdf.py index c1f7de49..27f0c2f5 100644 --- a/weasyprint/tests/test_pdf.py +++ b/weasyprint/tests/test_pdf.py @@ -15,6 +15,7 @@ from __future__ import division, unicode_literals import hashlib import io import os +import zlib import cairocffi import pytest @@ -427,27 +428,39 @@ def test_embedded_files(): pdf = PdfReader(fdata=pdf_bytes) embedded = pdf.Root.Names.EmbeddedFiles.Names + assert zlib.decompress( + embedded[1].EF.F.stream.encode('latin-1')) == b'hi there' assert embedded[1].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(b'hi there').hexdigest())) assert embedded[1].F.decode() == '' assert embedded[1].UF.decode() == 'attachment.bin' assert embedded[1].Desc.decode() == 'some file attachment äöü' + assert zlib.decompress( + embedded[3].EF.F.stream.encode('latin-1')) == b'12345678' assert embedded[3].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(adata).hexdigest())) assert embedded[3].UF.decode() == os.path.basename(absolute_tmp_file) + assert zlib.decompress( + embedded[5].EF.F.stream.encode('latin-1')) == b'abcdefgh' assert embedded[5].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(rdata).hexdigest())) assert embedded[5].UF.decode() == os.path.basename(relative_tmp_file) + assert zlib.decompress( + embedded[7].EF.F.stream.encode('latin-1')) == b'oob attachment' assert embedded[7].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(b'oob attachment').hexdigest())) assert embedded[7].Desc.decode() == 'Hello' + assert zlib.decompress( + embedded[9].EF.F.stream.encode('latin-1')) == b'raw URL' assert embedded[9].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(b'raw URL').hexdigest())) + assert zlib.decompress( + embedded[11].EF.F.stream.encode('latin-1')) == b'file like obj' assert embedded[11].EF.F.Params.CheckSum == ( '<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))