#35, a better fix. For csv, decode content when it is given content in bytes

This commit is contained in:
chfw 2017-05-18 08:38:21 +01:00
parent 1204330085
commit c57ab64cbb
3 changed files with 15 additions and 14 deletions

View File

@ -8,7 +8,7 @@
:license: New BSD License, see LICENSE for more details
"""
import pyexcel_io.manager as manager
from pyexcel_io._compact import PY2, OrderedDict, isstream, StringIO
from pyexcel_io._compact import OrderedDict, isstream
from .constants import (
MESSAGE_ERROR_03,
MESSAGE_WRONG_IO_INSTANCE
@ -80,7 +80,7 @@ class BookReader(RWInterface):
else:
raise IOError(MESSAGE_WRONG_IO_INSTANCE)
def open_content(self, file_content, encoding='utf-8', **keywords):
def open_content(self, file_content, **keywords):
"""
read file content as if it is a file stream with
unlimited keywords for reading
@ -88,7 +88,7 @@ class BookReader(RWInterface):
keywords are passed on to individual readers
"""
file_stream = _convert_content_to_stream(
file_content, self._file_type, encoding=encoding)
file_content, self._file_type)
self.open_stream(file_stream, **keywords)
def read_sheet_by_name(self, sheet_name):
@ -171,15 +171,8 @@ class BookWriter(RWInterface):
raise NotImplementedError("Please implement create_sheet()")
def _convert_content_to_stream(file_content, file_type, encoding='utf-8'):
def _convert_content_to_stream(file_content, file_type):
io = manager.get_io(file_type)
if PY2:
io.write(file_content.decode(encoding))
else:
if (isinstance(io, StringIO) and isinstance(file_content, bytes)):
content = file_content.decode(encoding)
else:
content = file_content
io.write(content)
io.write(file_content)
io.seek(0)
return io

View File

@ -201,6 +201,10 @@ class CSVinMemoryReader(CSVSheetReader):
f = self._native_sheet.payload
else:
if isinstance(self._native_sheet.payload, compact.BytesIO):
# please note that
# if the end developer feed us bytesio in python3
# we will do the conversion to StriongIO but that
# comes at a cost.
content = self._native_sheet.payload.read()
f = compact.StringIO(content.decode(self._encoding))
else:
@ -319,15 +323,19 @@ class CSVBookReader(BookReader):
def open_content(self, file_content, **keywords):
if compact.PY27_ABOVE:
import mmap
encoding = keywords.get('encoding', 'utf-8')
if isinstance(file_content, mmap.mmap):
# load from mmap
self.__multiple_sheets = keywords.get('multiple_sheets', False)
encoding = keywords.get('encoding', 'utf-8')
self._file_stream = CSVMemoryMapIterator(
file_content, encoding)
self._keywords = keywords
self._native_book = self._load_from_stream()
else:
if compact.PY3_ABOVE:
if isinstance(file_content, bytes):
file_content = file_content.decode(encoding)
# else python 2.7 does not care about bytes nor str
BookReader.open_content(
self, file_content, **keywords)
else:

View File

@ -145,7 +145,7 @@ def check_mmap_encoding(encoding):
def test_issue_35_encoding_for_file_content():
encoding = 'utf-8'
encoding = 'utf-16'
content = [
[u'Äkkilähdöt', u'Matkakirjoituksia', u'Matkatoimistot'],
[u'Äkkilähdöt', u'Matkakirjoituksia', u'Matkatoimistot']]