Revert "Use Latin-1-decoded strings instead of bytestrings in pdfrw streams"

This reverts commit a5bbc22627.
This commit is contained in:
Guillaume Ayoub 2018-01-30 23:16:43 +01:00
parent 4624332fcd
commit 3275ff9670
2 changed files with 3 additions and 18 deletions

View File

@ -89,15 +89,13 @@ def _create_compressed_file_object(source):
pdf_file_object = PdfDict(
Type=PdfName('EmbeddedFile'), Filter=PdfName('FlateDecode'))
# pdfrw needs Latin-1-decoded unicode strings in object.stream
pdf_file_object.stream = ''
pdf_file_object.stream = b''
size = 0
for data in iter(lambda: source.read(4096), b''):
size += len(data)
md5.update(data)
pdf_file_object.stream += compress.compress(data).decode('latin-1')
pdf_file_object.stream += compress.flush(zlib.Z_FINISH).decode('latin-1')
pdf_file_object.stream += compress.compress(data)
pdf_file_object.stream += compress.flush(zlib.Z_FINISH)
pdf_file_object.Params = PdfDict(
CheckSum=PdfString('<{}>'.format(md5.hexdigest())), Size=size)
return pdf_file_object

View File

@ -15,7 +15,6 @@ from __future__ import division, unicode_literals
import hashlib
import io
import os
import zlib
import cairocffi
import pytest
@ -428,39 +427,27 @@ def test_embedded_files():
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names
assert zlib.decompress(
embedded[1].EF.F.stream.encode('latin-1')) == b'hi there'
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()))
assert embedded[1].F.decode() == ''
assert embedded[1].UF.decode() == 'attachment.bin'
assert embedded[1].Desc.decode() == 'some file attachment äöü'
assert zlib.decompress(
embedded[3].EF.F.stream.encode('latin-1')) == b'12345678'
assert embedded[3].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(adata).hexdigest()))
assert embedded[3].UF.decode() == os.path.basename(absolute_tmp_file)
assert zlib.decompress(
embedded[5].EF.F.stream.encode('latin-1')) == b'abcdefgh'
assert embedded[5].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(rdata).hexdigest()))
assert embedded[5].UF.decode() == os.path.basename(relative_tmp_file)
assert zlib.decompress(
embedded[7].EF.F.stream.encode('latin-1')) == b'oob attachment'
assert embedded[7].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'oob attachment').hexdigest()))
assert embedded[7].Desc.decode() == 'Hello'
assert zlib.decompress(
embedded[9].EF.F.stream.encode('latin-1')) == b'raw URL'
assert embedded[9].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'raw URL').hexdigest()))
assert zlib.decompress(
embedded[11].EF.F.stream.encode('latin-1')) == b'file like obj'
assert embedded[11].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))