Revert "Use Latin-1-decoded strings instead of bytestrings in pdfrw streams"
This reverts commit a5bbc22627
.
This commit is contained in:
parent
4624332fcd
commit
3275ff9670
|
@ -89,15 +89,13 @@ def _create_compressed_file_object(source):
|
|||
|
||||
pdf_file_object = PdfDict(
|
||||
Type=PdfName('EmbeddedFile'), Filter=PdfName('FlateDecode'))
|
||||
|
||||
# pdfrw needs Latin-1-decoded unicode strings in object.stream
|
||||
pdf_file_object.stream = ''
|
||||
pdf_file_object.stream = b''
|
||||
size = 0
|
||||
for data in iter(lambda: source.read(4096), b''):
|
||||
size += len(data)
|
||||
md5.update(data)
|
||||
pdf_file_object.stream += compress.compress(data).decode('latin-1')
|
||||
pdf_file_object.stream += compress.flush(zlib.Z_FINISH).decode('latin-1')
|
||||
pdf_file_object.stream += compress.compress(data)
|
||||
pdf_file_object.stream += compress.flush(zlib.Z_FINISH)
|
||||
pdf_file_object.Params = PdfDict(
|
||||
CheckSum=PdfString('<{}>'.format(md5.hexdigest())), Size=size)
|
||||
return pdf_file_object
|
||||
|
|
|
@ -15,7 +15,6 @@ from __future__ import division, unicode_literals
|
|||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import zlib
|
||||
|
||||
import cairocffi
|
||||
import pytest
|
||||
|
@ -428,39 +427,27 @@ def test_embedded_files():
|
|||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
embedded = pdf.Root.Names.EmbeddedFiles.Names
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[1].EF.F.stream.encode('latin-1')) == b'hi there'
|
||||
assert embedded[1].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()))
|
||||
assert embedded[1].F.decode() == ''
|
||||
assert embedded[1].UF.decode() == 'attachment.bin'
|
||||
assert embedded[1].Desc.decode() == 'some file attachment äöü'
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[3].EF.F.stream.encode('latin-1')) == b'12345678'
|
||||
assert embedded[3].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(adata).hexdigest()))
|
||||
assert embedded[3].UF.decode() == os.path.basename(absolute_tmp_file)
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[5].EF.F.stream.encode('latin-1')) == b'abcdefgh'
|
||||
assert embedded[5].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(rdata).hexdigest()))
|
||||
assert embedded[5].UF.decode() == os.path.basename(relative_tmp_file)
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[7].EF.F.stream.encode('latin-1')) == b'oob attachment'
|
||||
assert embedded[7].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'oob attachment').hexdigest()))
|
||||
assert embedded[7].Desc.decode() == 'Hello'
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[9].EF.F.stream.encode('latin-1')) == b'raw URL'
|
||||
assert embedded[9].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'raw URL').hexdigest()))
|
||||
|
||||
assert zlib.decompress(
|
||||
embedded[11].EF.F.stream.encode('latin-1')) == b'file like obj'
|
||||
assert embedded[11].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))
|
||||
|
||||
|
|
Loading…
Reference in New Issue