#35, a better fix. For csv, decode content when it is given content in bytes

2017-05-18 08:38:21 +01:00 · 2017-05-18 08:38:21 +01:00 · c57ab64cbb
parent 1204330085
commit c57ab64cbb
3 changed files with 15 additions and 14 deletions
--- a/pyexcel_io/book.py
+++ b/pyexcel_io/book.py
@ -8,7 +8,7 @@
    :license: New BSD License, see LICENSE for more details
 """
 import pyexcel_io.manager as manager
-from pyexcel_io._compact import PY2, OrderedDict, isstream, StringIO
+from pyexcel_io._compact import OrderedDict, isstream
 from .constants import (
    MESSAGE_ERROR_03,
    MESSAGE_WRONG_IO_INSTANCE
@ -80,7 +80,7 @@ class BookReader(RWInterface):
        else:
            raise IOError(MESSAGE_WRONG_IO_INSTANCE)

-    def open_content(self, file_content, encoding='utf-8', **keywords):
+    def open_content(self, file_content, **keywords):
        """
        read file content as if it is a file stream with
        unlimited keywords for reading
@ -88,7 +88,7 @@ class BookReader(RWInterface):
        keywords are passed on to individual readers
        """
        file_stream = _convert_content_to_stream(
-            file_content, self._file_type, encoding=encoding)
+            file_content, self._file_type)
        self.open_stream(file_stream, **keywords)

    def read_sheet_by_name(self, sheet_name):
@ -171,15 +171,8 @@ class BookWriter(RWInterface):
        raise NotImplementedError("Please implement create_sheet()")


-def _convert_content_to_stream(file_content, file_type, encoding='utf-8'):
+def _convert_content_to_stream(file_content, file_type):
    io = manager.get_io(file_type)
-    if PY2:
-        io.write(file_content.decode(encoding))
-    else:
-        if (isinstance(io, StringIO) and isinstance(file_content, bytes)):
-            content = file_content.decode(encoding)
-        else:
-            content = file_content
-        io.write(content)
+    io.write(file_content)
    io.seek(0)
    return io
--- a/pyexcel_io/fileformat/_csv.py
+++ b/pyexcel_io/fileformat/_csv.py
@ -201,6 +201,10 @@ class CSVinMemoryReader(CSVSheetReader):
                f = self._native_sheet.payload
        else:
            if isinstance(self._native_sheet.payload, compact.BytesIO):
+                # please note that
+                # if the end developer feed us bytesio in python3
+                # we will do the conversion to StriongIO but that
+                # comes at a cost.
                content = self._native_sheet.payload.read()
                f = compact.StringIO(content.decode(self._encoding))
            else:
@ -319,15 +323,19 @@ class CSVBookReader(BookReader):
    def open_content(self, file_content, **keywords):
        if compact.PY27_ABOVE:
            import mmap
+            encoding = keywords.get('encoding', 'utf-8')
            if isinstance(file_content, mmap.mmap):
                # load from mmap
                self.__multiple_sheets = keywords.get('multiple_sheets', False)
-                encoding = keywords.get('encoding', 'utf-8')
                self._file_stream = CSVMemoryMapIterator(
                    file_content, encoding)
                self._keywords = keywords
                self._native_book = self._load_from_stream()
            else:
+                if compact.PY3_ABOVE:
+                    if isinstance(file_content, bytes):
+                        file_content = file_content.decode(encoding)
+                # else python 2.7 does not care about bytes nor str
                BookReader.open_content(
                    self, file_content, **keywords)
        else:
--- a/tests/test_issues.py
+++ b/tests/test_issues.py
@ -145,7 +145,7 @@ def check_mmap_encoding(encoding):


 def test_issue_35_encoding_for_file_content():
-    encoding = 'utf-8'
+    encoding = 'utf-16'
    content = [
        [u'Äkkilähdöt', u'Matkakirjoituksia', u'Matkatoimistot'],
        [u'Äkkilähdöt', u'Matkakirjoituksia', u'Matkatoimistot']]