👕 bleach the code using white

This commit is contained in:
chfw 2018-04-15 23:05:35 +01:00
parent 983bb7e013
commit 47915d1cbc
27 changed files with 485 additions and 342 deletions

View File

@ -9,17 +9,17 @@
"""
import logging
from ._compact import NullHandler
logging.getLogger(__name__).addHandler(NullHandler()) # noqa
from .io import get_data, iget_data, save_data # noqa
import pyexcel_io.plugins as plugins
BLACK_LIST = [__name__, 'pyexcel_webio', 'pyexcel_text']
BLACK_LIST = [__name__, "pyexcel_webio", "pyexcel_text"]
WHITE_LIST = [
'pyexcel_io.readers',
'pyexcel_io.writers',
'pyexcel_io.database']
PREFIX = 'pyexcel_'
"pyexcel_io.readers", "pyexcel_io.writers", "pyexcel_io.database"
]
PREFIX = "pyexcel_"
plugins.load_plugins(PREFIX, __path__, BLACK_LIST, WHITE_LIST)

View File

@ -31,22 +31,29 @@ else:
try:
from logging import NullHandler
except ImportError:
class NullHandler(logging.Handler):
def emit(self, record):
pass
if PY2:
from cStringIO import StringIO
from cStringIO import StringIO as BytesIO
text_type = unicode
irange = xrange
class Iterator(object):
def next(self):
return type(self).__next__(self)
else:
from io import StringIO, BytesIO
text_type = str
Iterator = object
irange = range
@ -56,19 +63,22 @@ def isstream(instance):
""" check if a instance is a stream """
try:
import mmap
i_am_not_mmap_obj = not isinstance(instance, mmap.mmap)
except ImportError:
# Python 2.6 or Google App Engine
i_am_not_mmap_obj = True
return hasattr(instance, 'read') and i_am_not_mmap_obj
return hasattr(instance, "read") and i_am_not_mmap_obj
def is_string(atype):
"""find out if a type is str or not"""
if atype == str:
return True
elif PY2:
if atype == unicode:
return True
return False

View File

@ -9,10 +9,7 @@
"""
import pyexcel_io.manager as manager
from pyexcel_io._compact import OrderedDict, isstream, PY2
from .constants import (
MESSAGE_ERROR_03,
MESSAGE_WRONG_IO_INSTANCE
)
from .constants import MESSAGE_ERROR_03, MESSAGE_WRONG_IO_INSTANCE
class RWInterface(object):
@ -51,6 +48,7 @@ class RWInterface(object):
pass
# implement context manager
def __enter__(self):
return self
@ -62,6 +60,7 @@ class BookReader(RWInterface):
"""
Standard book reader
"""
def __init__(self):
super(BookReader, self).__init__()
self._file_name = None
@ -86,14 +85,15 @@ class BookReader(RWInterface):
"""
if isstream(file_stream):
if PY2:
if hasattr(file_stream, 'seek'):
if hasattr(file_stream, "seek"):
file_stream.seek(0)
else:
# python 2
# Hei zipfile in odfpy would do a seek
# but stream from urlib cannot do seek
file_stream = _convert_content_to_stream(
file_stream.read(), self._file_type)
file_stream.read(), self._file_type
)
else:
from io import UnsupportedOperation
@ -102,7 +102,8 @@ class BookReader(RWInterface):
except UnsupportedOperation:
# python 3
file_stream = _convert_content_to_stream(
file_stream.read(), self._file_type)
file_stream.read(), self._file_type
)
self._file_stream = file_stream
self._keywords = keywords
@ -116,18 +117,21 @@ class BookReader(RWInterface):
keywords are passed on to individual readers
"""
file_stream = _convert_content_to_stream(
file_content, self._file_type)
file_stream = _convert_content_to_stream(file_content, self._file_type)
self.open_stream(file_stream, **keywords)
def read_sheet_by_name(self, sheet_name):
"""
read a named sheet from a excel data book
"""
named_contents = [content for content in self._native_book
if content.name == sheet_name]
named_contents = [
content
for content in self._native_book
if content.name == sheet_name
]
if len(named_contents) == 1:
return {named_contents[0].name: self.read_sheet(named_contents[0])}
else:
raise ValueError("Cannot find sheet %s" % sheet_name)
@ -138,6 +142,7 @@ class BookReader(RWInterface):
try:
sheet = self._native_book[sheet_index]
return {sheet.name: self.read_sheet(sheet)}
except IndexError:
self.close()
raise
@ -174,6 +179,7 @@ class BookWriter(RWInterface):
"""
Standard book writer
"""
def __init__(self):
super(BookWriter, self).__init__()
self._file_alike_object = None
@ -196,6 +202,7 @@ class BookWriter(RWInterface):
"""
if not isstream(file_stream):
raise IOError(MESSAGE_ERROR_03)
self.open(file_stream, **keywords)
def open_content(self, file_stream, **keywords):

View File

@ -8,9 +8,9 @@
:license: New BSD License
"""
# flake8: noqa
DEFAULT_NAME = 'pyexcel'
DEFAULT_SHEET_NAME = '%s_sheet1' % DEFAULT_NAME
DEFAULT_PLUGIN_NAME = '__%s_io_plugins__' % DEFAULT_NAME
DEFAULT_NAME = "pyexcel"
DEFAULT_SHEET_NAME = "%s_sheet1" % DEFAULT_NAME
DEFAULT_PLUGIN_NAME = "__%s_io_plugins__" % DEFAULT_NAME
MESSAGE_INVALID_PARAMETERS = "Invalid parameters"
MESSAGE_ERROR_02 = "No content, file name. Nothing is given"
@ -26,27 +26,26 @@ MESSAGE_IGNORE_ROW = "One row is ignored"
MESSAGE_DB_EXCEPTION = """
Warning: Bulk insertion got below exception. Trying to do it one by one slowly."""
FILE_FORMAT_CSV = 'csv'
FILE_FORMAT_TSV = 'tsv'
FILE_FORMAT_CSVZ = 'csvz'
FILE_FORMAT_TSVZ = 'tsvz'
FILE_FORMAT_ODS = 'ods'
FILE_FORMAT_XLS = 'xls'
FILE_FORMAT_XLSX = 'xlsx'
FILE_FORMAT_XLSM = 'xlsm'
DB_SQL = 'sql'
DB_DJANGO = 'django'
KEYWORD_TSV_DIALECT = 'excel-tab'
KEYWORD_LINE_TERMINATOR = 'lineterminator'
FILE_FORMAT_CSV = "csv"
FILE_FORMAT_TSV = "tsv"
FILE_FORMAT_CSVZ = "csvz"
FILE_FORMAT_TSVZ = "tsvz"
FILE_FORMAT_ODS = "ods"
FILE_FORMAT_XLS = "xls"
FILE_FORMAT_XLSX = "xlsx"
FILE_FORMAT_XLSM = "xlsm"
DB_SQL = "sql"
DB_DJANGO = "django"
KEYWORD_TSV_DIALECT = "excel-tab"
KEYWORD_LINE_TERMINATOR = "lineterminator"
SKIP_DATA = -1
TAKE_DATA = 0
STOP_ITERATION = 1
DEFAULT_MULTI_CSV_SEPARATOR = '__'
SEPARATOR_FORMATTER = '---%s---' % DEFAULT_NAME + "%s"
DEFAULT_MULTI_CSV_SEPARATOR = "__"
SEPARATOR_FORMATTER = "---%s---" % DEFAULT_NAME + "%s"
SEPARATOR_MATCHER = "---%s:(.*)---" % DEFAULT_NAME
DEFAULT_CSV_STREAM_FILE_FORMATTER = (
"---%s:" % DEFAULT_NAME + "%s---%s")
DEFAULT_CSV_NEWLINE = '\r\n'
DEFAULT_CSV_STREAM_FILE_FORMATTER = ("---%s:" % DEFAULT_NAME + "%s---%s")
DEFAULT_CSV_NEWLINE = "\r\n"

View File

@ -12,15 +12,15 @@ from pyexcel_io.constants import DB_DJANGO, DB_SQL
IOPluginInfoChain(__name__).add_a_reader(
relative_plugin_class_path='exporters.django.DjangoBookReader',
file_types=[DB_DJANGO]
relative_plugin_class_path="exporters.django.DjangoBookReader",
file_types=[DB_DJANGO],
).add_a_reader(
relative_plugin_class_path='exporters.sqlalchemy.SQLBookReader',
relative_plugin_class_path="exporters.sqlalchemy.SQLBookReader",
file_types=[DB_SQL],
).add_a_writer(
relative_plugin_class_path='importers.django.DjangoBookWriter',
relative_plugin_class_path="importers.django.DjangoBookWriter",
file_types=[DB_DJANGO],
).add_a_writer(
relative_plugin_class_path='importers.sqlalchemy.SQLBookWriter',
file_types=[DB_SQL]
relative_plugin_class_path="importers.sqlalchemy.SQLBookWriter",
file_types=[DB_SQL],
)

View File

@ -12,6 +12,7 @@ from pyexcel_io.book import BookReader
class DbExporter(BookReader):
""" Transcode the book reader interface to db interface """
def open(self, file_name, **keywords):
self.export_tables(self, file_name, **keywords)
@ -28,6 +29,7 @@ class DbExporter(BookReader):
class DjangoModelExportAdapter(object):
""" django export parameter holder """
def __init__(self, model, export_columns=None):
self.model = model
self.export_columns = export_columns
@ -44,8 +46,10 @@ class DjangoModelExportAdapter(object):
class DjangoModelImportAdapter(DjangoModelExportAdapter):
""" parameter holder for django data import """
class InOutParameter(object):
""" local class to manipulate variable io """
def __init__(self):
self.output = None
self.input = None
@ -102,7 +106,8 @@ class DjangoModelImportAdapter(DjangoModelExportAdapter):
if self.__column_names.input:
self.__column_names.output = [
self.__column_name_mapping_dict.input[name]
for name in self.__column_names.input]
for name in self.__column_names.input
]
self.__column_name_mapping_dict.output = None
if self.__column_names.output is None:
self.__column_names.output = self.__column_names.input
@ -110,6 +115,7 @@ class DjangoModelImportAdapter(DjangoModelExportAdapter):
class DjangoModelExporter(object):
""" public interface for django model export """
def __init__(self):
self.adapters = []
@ -120,6 +126,7 @@ class DjangoModelExporter(object):
class DjangoModelImporter(object):
""" public interface for django model import """
def __init__(self):
self.__adapters = {}
@ -134,26 +141,29 @@ class DjangoModelImporter(object):
class SQLTableExportAdapter(DjangoModelExportAdapter):
""" parameter holder for sql table data export """
def __init__(self, model, export_columns=None):
DjangoModelExportAdapter.__init__(self, model, export_columns)
self.table = model
def get_name(self):
return getattr(self.table, '__tablename__', None)
return getattr(self.table, "__tablename__", None)
class SQLTableImportAdapter(DjangoModelImportAdapter):
""" parameter holder for sqlalchemy table import """
def __init__(self, model):
DjangoModelImportAdapter.__init__(self, model)
self.table = model
def get_name(self):
return getattr(self.table, '__tablename__', None)
return getattr(self.table, "__tablename__", None)
class SQLTableExporter(DjangoModelExporter):
""" public interface for sql table export """
def __init__(self, session):
DjangoModelExporter.__init__(self)
self.session = session
@ -161,6 +171,7 @@ class SQLTableExporter(DjangoModelExporter):
class SQLTableImporter(DjangoModelImporter):
""" public interface to do data import via sqlalchemy """
def __init__(self, session):
DjangoModelImporter.__init__(self)
self.session = session

View File

@ -14,21 +14,23 @@ from pyexcel_io.database.querysets import QuerysetsReader
class DjangoModelReader(QuerysetsReader):
"""Read from django model
"""
def __init__(self, model, export_columns=None, **keywords):
self.__model = model
if export_columns:
column_names = export_columns
else:
column_names = sorted(
[field.attname
for field in self.__model._meta.concrete_fields])
QuerysetsReader.__init__(self, self.__model.objects.all(),
column_names,
**keywords)
[field.attname for field in self.__model._meta.concrete_fields]
)
QuerysetsReader.__init__(
self, self.__model.objects.all(), column_names, **keywords
)
class DjangoBookReader(DbExporter):
""" read django models """
def __init__(self):
DbExporter.__init__(self)
self.exporter = None
@ -38,8 +40,9 @@ class DjangoBookReader(DbExporter):
self._load_from_django_models()
def read_sheet(self, native_sheet):
reader = DjangoModelReader(native_sheet.model,
native_sheet.export_columns)
reader = DjangoModelReader(
native_sheet.model, native_sheet.export_columns
)
return reader.to_array()
def _load_from_django_models(self):

View File

@ -14,6 +14,7 @@ from pyexcel_io.database.querysets import QuerysetsReader
class SQLTableReader(QuerysetsReader):
"""Read a table
"""
def __init__(self, session, table, export_columns=None, **keywords):
everything = session.query(table).all()
column_names = None
@ -21,14 +22,19 @@ class SQLTableReader(QuerysetsReader):
column_names = export_columns
else:
if len(everything) > 0:
column_names = sorted([
column for column in everything[0].__dict__
if column != '_sa_instance_state'])
column_names = sorted(
[
column
for column in everything[0].__dict__
if column != "_sa_instance_state"
]
)
QuerysetsReader.__init__(self, everything, column_names, **keywords)
class SQLBookReader(DbExporter):
""" read a table via sqlalchemy """
def __init__(self):
DbExporter.__init__(self)
self.__exporter = None
@ -41,7 +47,8 @@ class SQLBookReader(DbExporter):
reader = SQLTableReader(
self.__exporter.session,
native_sheet.table,
native_sheet.export_columns)
native_sheet.export_columns,
)
return reader.to_array()
def _load_from_tables(self):

View File

@ -19,8 +19,8 @@ log = logging.getLogger(__name__)
class DjangoModelWriter(SheetWriter):
""" import data into a django model """
def __init__(self, importer, adapter, batch_size=None,
bulk_save=True):
def __init__(self, importer, adapter, batch_size=None, bulk_save=True):
SheetWriter.__init__(self, importer, adapter, adapter.name)
self.__batch_size = batch_size
self.__model = adapter.model
@ -39,16 +39,20 @@ class DjangoModelWriter(SheetWriter):
if self.__initializer is not None:
model_to_be_created = self.__initializer(new_array)
if model_to_be_created:
self.__objs.append(self.__model(**dict(
zip(self.__column_names, model_to_be_created)
)))
# else
# skip the row
self.__objs.append(
self.__model(
**dict(zip(self.__column_names, model_to_be_created))
)
)
# else
# skip the row
def close(self):
if self.__bulk_save:
self.__model.objects.bulk_create(
self.__objs, batch_size=self.__batch_size)
self.__objs, batch_size=self.__batch_size
)
else:
for an_object in self.__objs:
an_object.save()
@ -56,6 +60,7 @@ class DjangoModelWriter(SheetWriter):
class DjangoBookWriter(BookWriter):
""" write data into django models """
def __init__(self):
BookWriter.__init__(self)
self.__importer = None
@ -69,12 +74,16 @@ class DjangoBookWriter(BookWriter):
model = self.__importer.get(sheet_name)
if model:
sheet_writer = DjangoModelWriter(
self.__importer, model,
batch_size=self._keywords.get('batch_size', None),
bulk_save=self._keywords.get('bulk_save', True)
self.__importer,
model,
batch_size=self._keywords.get("batch_size", None),
bulk_save=self._keywords.get("bulk_save", True),
)
else:
raise Exception(
"Sheet: %s does not match any given models." % sheet_name +
"Please be aware of case sensitivity.")
"Sheet: %s does not match any given models."
% sheet_name
+ "Please be aware of case sensitivity."
)
return sheet_writer

View File

@ -24,10 +24,13 @@ class PyexcelSQLSkipRowException(Exception):
class SQLTableWriter(SheetWriter):
"""Write to a table
"""
def __init__(self, importer, adapter, auto_commit=True,
bulk_size=1000, **keywords):
SheetWriter.__init__(self, importer, adapter,
adapter.get_name(), **keywords)
def __init__(
self, importer, adapter, auto_commit=True, bulk_size=1000, **keywords
):
SheetWriter.__init__(
self, importer, adapter, adapter.get_name(), **keywords
)
self.__auto_commit = auto_commit
self.__count = 0
self.__bulk_size = bulk_size
@ -59,7 +62,7 @@ class SQLTableWriter(SheetWriter):
key = name
setattr(obj, key, row[name])
self._native_book.session.add(obj)
if self.__auto_commit and self.__bulk_size != float('inf'):
if self.__auto_commit and self.__bulk_size != float("inf"):
self.__count += 1
if self.__count % self.__bulk_size == 0:
self._native_book.session.commit()
@ -71,6 +74,7 @@ class SQLTableWriter(SheetWriter):
class SQLBookWriter(BookWriter):
""" write data into database tables via sqlalchemy """
def __init__(self):
BookWriter.__init__(self)
self.__importer = None
@ -85,11 +89,13 @@ class SQLBookWriter(BookWriter):
adapter = self.__importer.get(sheet_name)
if adapter:
sheet_writer = SQLTableWriter(
self.__importer, adapter,
auto_commit=self.__auto_commit
self.__importer, adapter, auto_commit=self.__auto_commit
)
else:
raise Exception(
"Sheet: %s does not match any given tables." % sheet_name +
"Please be aware of case sensitivity.")
"Sheet: %s does not match any given tables."
% sheet_name
+ "Please be aware of case sensitivity."
)
return sheet_writer

View File

@ -15,6 +15,7 @@ from pyexcel_io.sheet import SheetReader
class QuerysetsReader(SheetReader):
""" turn querysets into an array """
def __init__(self, query_sets, column_names, **keywords):
SheetReader.__init__(self, query_sets, **keywords)
self.__column_names = column_names
@ -26,12 +27,12 @@ class QuerysetsReader(SheetReader):
"""
if len(self.__query_sets) == 0:
yield []
for element in SheetReader.to_array(self):
yield element
def row_iterator(self):
return chain([self.__column_names],
self.__query_sets)
return chain([self.__column_names], self.__query_sets)
def column_iterator(self, row):
if self.__column_names is None:
@ -40,20 +41,19 @@ class QuerysetsReader(SheetReader):
if isinstance(row, list):
for element in row:
yield element
else:
for column in self.__column_names:
if '__' in column:
value = get_complex_attribute(
row, column)
if "__" in column:
value = get_complex_attribute(row, column)
else:
value = get_simple_attribute(
row, column)
value = get_simple_attribute(row, column)
yield value
def get_complex_attribute(row, attribute):
""" recursively get an attribute """
attributes = attribute.split('__')
attributes = attribute.split("__")
value = row
try:
for attributee in attributes:

View File

@ -36,7 +36,8 @@ def iget_data(afile, file_type=None, **keywords):
:returns: an ordered dictionary
"""
data, reader = _get_data(
afile, file_type=file_type, streaming=True, **keywords)
afile, file_type=file_type, streaming=True, **keywords
)
return data, reader
@ -60,26 +61,26 @@ def get_data(afile, file_type=None, streaming=None, **keywords):
:returns: an ordered dictionary
"""
if streaming is not None and streaming is True:
warnings.warn('Please use iget_data instead')
data, _ = _get_data(afile, file_type=file_type,
streaming=False, **keywords)
warnings.warn("Please use iget_data instead")
data, _ = _get_data(
afile, file_type=file_type, streaming=False, **keywords
)
return data
def _get_data(afile, file_type=None, **keywords):
if isstream(afile):
keywords.update(dict(
file_stream=afile,
file_type=file_type or constants.FILE_FORMAT_CSV))
keywords.update(
dict(
file_stream=afile,
file_type=file_type or constants.FILE_FORMAT_CSV,
)
)
else:
if afile is None or file_type is None:
keywords.update(dict(
file_name=afile,
file_type=file_type))
keywords.update(dict(file_name=afile, file_type=file_type))
else:
keywords.update(dict(
file_content=afile,
file_type=file_type))
keywords.update(dict(file_content=afile, file_type=file_type))
return load_data(**keywords)
@ -113,10 +114,13 @@ def save_data(afile, data, file_type=None, **keywords):
if no_file_type:
file_type = constants.FILE_FORMAT_CSV
store_data(afile, to_store,
file_type=file_type,
single_sheet_in_book=single_sheet_in_book,
**keywords)
store_data(
afile,
to_store,
file_type=file_type,
single_sheet_in_book=single_sheet_in_book,
**keywords
)
def store_data(afile, data, file_type=None, **keywords):
@ -128,29 +132,25 @@ def store_data(afile, data, file_type=None, **keywords):
:param keywords: any other parameters
"""
if isstream(afile):
keywords.update(dict(
file_stream=afile,
file_type=file_type
))
keywords.update(dict(file_stream=afile, file_type=file_type))
else:
keywords.update(dict(
file_name=afile,
file_type=file_type
))
keywords.update(dict(file_name=afile, file_type=file_type))
with get_writer(**keywords) as writer:
writer.write(data)
def load_data(file_name=None,
file_content=None,
file_stream=None,
file_type=None,
sheet_name=None,
sheet_index=None,
sheets=None,
library=None,
streaming=False,
**keywords):
def load_data(
file_name=None,
file_content=None,
file_stream=None,
file_type=None,
sheet_name=None,
sheet_index=None,
sheets=None,
library=None,
streaming=False,
**keywords
):
"""Load data from any supported excel formats
:param filename: actual file name, a file stream or actual content
@ -164,6 +164,7 @@ def load_data(file_name=None,
number_of_none_inputs = [x for x in inputs if x is not None]
if len(number_of_none_inputs) != 1:
raise IOError(constants.MESSAGE_ERROR_02)
if file_type is None:
try:
file_type = file_name.split(".")[-1]
@ -194,20 +195,23 @@ def load_data(file_name=None,
return result, reader
def get_writer(file_name=None, file_stream=None,
file_type=None, library=None, **keywords):
def get_writer(
file_name=None, file_stream=None, file_type=None, library=None, **keywords
):
"""find a suitable writer"""
inputs = [file_name, file_stream]
number_of_none_inputs = [x for x in inputs if x is not None]
if len(number_of_none_inputs) != 1:
raise IOError(constants.MESSAGE_ERROR_02)
file_type_given = True
if file_type is None and file_name:
try:
file_type = file_name.split(".")[-1]
except AttributeError:
raise Exception("file_name should be a string type")
file_type_given = False
writer = WRITERS.get_a_plugin(file_type, library)

View File

@ -20,9 +20,9 @@ def register_stream_type(file_type, stream_type):
"""
keep track of stream type for different file formats
"""
if stream_type == 'text':
if stream_type == "text":
TEXT_STREAM_TYPES.append(file_type)
elif stream_type == 'binary':
elif stream_type == "binary":
BINARY_STREAM_TYPES.append(file_type)
@ -38,8 +38,10 @@ def get_io(file_type):
if __file_type in TEXT_STREAM_TYPES:
return StringIO()
elif __file_type in BINARY_STREAM_TYPES:
return BytesIO()
else:
return None
@ -56,8 +58,10 @@ def get_io_type(file_type):
if __file_type in TEXT_STREAM_TYPES:
return "string"
elif __file_type in BINARY_STREAM_TYPES:
return "bytes"
else:
return None

View File

@ -20,12 +20,13 @@ import pyexcel_io.constants as constants
ERROR_MESSAGE_FORMATTER = "one of these plugins for %s data in '%s': %s"
UPGRADE_MESSAGE = "Please upgrade the plugin '%s' according to \
plugin compactibility table."
READER_PLUGIN = 'pyexcel-io reader'
WRITER_PLUGIN = 'pyexcel-io writer'
READER_PLUGIN = "pyexcel-io reader"
WRITER_PLUGIN = "pyexcel-io writer"
class IOPluginInfo(PluginInfo):
"""Pyexcel-io plugin info description"""
def tags(self):
for file_type in self.file_types:
yield file_type
@ -33,35 +34,47 @@ class IOPluginInfo(PluginInfo):
class IOPluginInfoChain(PluginInfoChain):
"""provide custom functions to add a reader and a writer """
def add_a_reader(self, relative_plugin_class_path=None,
file_types=None, stream_type=None):
def add_a_reader(
self,
relative_plugin_class_path=None,
file_types=None,
stream_type=None,
):
""" add pyexcle-io reader plugin info """
a_plugin_info = IOPluginInfo(
READER_PLUGIN,
self._get_abs_path(relative_plugin_class_path),
file_types=file_types,
stream_type=stream_type)
stream_type=stream_type,
)
return self.add_a_plugin_instance(a_plugin_info)
def add_a_writer(self, relative_plugin_class_path=None,
file_types=None, stream_type=None):
def add_a_writer(
self,
relative_plugin_class_path=None,
file_types=None,
stream_type=None,
):
""" add pyexcle-io writer plugin info """
a_plugin_info = IOPluginInfo(
WRITER_PLUGIN,
self._get_abs_path(relative_plugin_class_path),
file_types=file_types,
stream_type=stream_type)
stream_type=stream_type,
)
return self.add_a_plugin_instance(a_plugin_info)
class IOManager(PluginManager):
"""Manage pyexcel-io plugins"""
def __init__(self, plugin_type, known_list):
PluginManager.__init__(self, plugin_type)
self.known_plugins = known_list
self.action = 'read'
self.action = "read"
if self.plugin_name == WRITER_PLUGIN:
self.action = 'write'
self.action = "write"
def load_me_later(self, plugin_info):
PluginManager.load_me_later(self, plugin_info)
@ -85,28 +98,32 @@ class IOManager(PluginManager):
message = "Please install "
if len(plugins) > 1:
message += ERROR_MESSAGE_FORMATTER % (
self.action, file_type, ','.join(plugins))
self.action, file_type, ",".join(plugins)
)
else:
message += plugins[0]
raise exceptions.SupportingPluginAvailableButNotInstalled(message)
else:
raise exceptions.NoSupportingPluginFound(
"No suitable library found for %s" % file_type)
"No suitable library found for %s" % file_type
)
def get_all_formats(self):
""" return all supported formats """
all_formats = set(list(self.registry.keys()) +
list(self.known_plugins.keys()))
all_formats = all_formats.difference(set([constants.DB_SQL,
constants.DB_DJANGO]))
all_formats = set(
list(self.registry.keys()) + list(self.known_plugins.keys())
)
all_formats = all_formats.difference(
set([constants.DB_SQL, constants.DB_DJANGO])
)
return all_formats
def _do_additional_registration(plugin_info):
for file_type in plugin_info.tags():
manager.register_stream_type(file_type, plugin_info.stream_type)
manager.register_a_file_type(
file_type, plugin_info.stream_type, None)
manager.register_a_file_type(file_type, plugin_info.stream_type, None)
READERS = IOManager(READER_PLUGIN, ioutils.AVAILABLE_READERS)
@ -116,5 +133,5 @@ WRITERS = IOManager(WRITER_PLUGIN, ioutils.AVAILABLE_WRITERS)
def load_plugins(prefix, path, black_list, white_list):
"""Try to discover all pyexcel-io plugins"""
scan_plugins(
prefix, # constants.DEFAULT_PLUGIN_NAME,
path, black_list, white_list)
prefix, path, black_list, white_list # constants.DEFAULT_PLUGIN_NAME,
)

View File

@ -11,19 +11,19 @@ from pyexcel_io.plugins import IOPluginInfoChain
IOPluginInfoChain(__name__).add_a_reader(
relative_plugin_class_path='csvr.CSVBookReader',
file_types=['csv'],
stream_type='text'
relative_plugin_class_path="csvr.CSVBookReader",
file_types=["csv"],
stream_type="text",
).add_a_reader(
relative_plugin_class_path='tsv.TSVBookReader',
file_types=['tsv'],
stream_type='text'
relative_plugin_class_path="tsv.TSVBookReader",
file_types=["tsv"],
stream_type="text",
).add_a_reader(
relative_plugin_class_path='csvz.CSVZipBookReader',
file_types=['csvz'],
stream_type='binary'
relative_plugin_class_path="csvz.CSVZipBookReader",
file_types=["csvz"],
stream_type="binary",
).add_a_reader(
relative_plugin_class_path='tsvz.TSVZipBookReader',
file_types=['tsvz'],
stream_type='binary'
relative_plugin_class_path="tsvz.TSVZipBookReader",
file_types=["tsvz"],
stream_type="binary",
)

View File

@ -20,14 +20,15 @@ import pyexcel_io.constants as constants
import pyexcel_io.service as service
DEFAULT_SEPARATOR = '__'
DEFAULT_SHEET_SEPARATOR_FORMATTER = '---%s---' % constants.DEFAULT_NAME + "%s"
DEFAULT_SEPARATOR = "__"
DEFAULT_SHEET_SEPARATOR_FORMATTER = "---%s---" % constants.DEFAULT_NAME + "%s"
SEPARATOR_MATCHER = "---%s:(.*)---" % constants.DEFAULT_NAME
DEFAULT_CSV_STREAM_FILE_FORMATTER = (
"---%s:" % constants.DEFAULT_NAME + "%s---%s")
DEFAULT_NEWLINE = '\r\n'
BOM_LITTLE_ENDIAN = b'\xff\xfe'
BOM_BIG_ENDIAN = b'\xfe\ff'
"---%s:" % constants.DEFAULT_NAME + "%s---%s"
)
DEFAULT_NEWLINE = "\r\n"
BOM_LITTLE_ENDIAN = b"\xff\xfe"
BOM_BIG_ENDIAN = b"\xfe\ff"
LITTLE_ENDIAN = 0
BIG_ENDIAN = 1
@ -39,30 +40,31 @@ class CSVMemoryMapIterator(compact.Iterator):
mmap object does not handle encoding at all. This class
provide the necessary transcoding for utf-8, utf-16 and utf-32
"""
def __init__(self, mmap_obj, encoding):
self.__mmap_obj = mmap_obj
self.__encoding = encoding
self.__count = 0
self.__endian = LITTLE_ENDIAN
if encoding == 'utf-8':
if encoding == "utf-8":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 0
elif encoding == 'utf-16':
elif encoding == "utf-16":
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 1
elif encoding == 'utf-32':
elif encoding == "utf-32":
# \r\x00\x00\x00\n
# \x00\x00\x00\x..
self.__zeros_left_in_2_row = 3
elif encoding == 'utf-32-be' or encoding == 'utf-16-be':
elif encoding == "utf-32-be" or encoding == "utf-16-be":
self.__zeros_left_in_2_row = 0
self.__endian = BIG_ENDIAN
elif encoding == 'utf-32-le':
elif encoding == "utf-32-le":
self.__zeros_left_in_2_row = 3
self.__endian = LITTLE_ENDIAN
elif encoding == 'utf-16-le':
elif encoding == "utf-16-le":
self.__zeros_left_in_2_row = 1
self.__endian = LITTLE_ENDIAN
else:
@ -74,8 +76,9 @@ class CSVMemoryMapIterator(compact.Iterator):
def __next__(self):
line = self.__mmap_obj.readline()
if self.__count == 0:
utf_16_32 = (self.__encoding == 'utf-16' or
self.__encoding == 'utf-32')
utf_16_32 = (
self.__encoding == "utf-16" or self.__encoding == "utf-32"
)
if utf_16_32:
bom_header = line[:2]
if bom_header == BOM_BIG_ENDIAN:
@ -86,11 +89,12 @@ class CSVMemoryMapIterator(compact.Iterator):
line = line.rstrip()
line = line.decode(self.__encoding)
self.__count += 1
if line == '':
if line == "":
raise StopIteration
if compact.PY2:
# python 2 requires utf-8 encoded string for reading
line = line.encode('utf-8')
line = line.encode("utf-8")
return line
@ -98,6 +102,7 @@ class UTF8Recorder(compact.Iterator):
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8.
"""
def __init__(self, file_handle, encoding):
self.__file_handle = file_handle
self.reader = codecs.getreader(encoding)(file_handle)
@ -110,16 +115,23 @@ class UTF8Recorder(compact.Iterator):
def __next__(self):
# python 2 requires utf-8 encoded string for reading
line = next(self.reader).encode('utf-8')
line = next(self.reader).encode("utf-8")
return line
class CSVSheetReader(SheetReader):
""" generic csv file reader"""
def __init__(self, sheet, encoding="utf-8",
auto_detect_float=True, ignore_infinity=True,
auto_detect_int=True, auto_detect_datetime=True,
**keywords):
def __init__(
self,
sheet,
encoding="utf-8",
auto_detect_float=True,
ignore_infinity=True,
auto_detect_int=True,
auto_detect_datetime=True,
**keywords
):
SheetReader.__init__(self, sheet, **keywords)
self._encoding = encoding
self.__auto_detect_int = auto_detect_int
@ -139,8 +151,8 @@ class CSVSheetReader(SheetReader):
def column_iterator(self, row):
for element in row:
if compact.PY2:
element = element.decode('utf-8')
if element is not None and element != '':
element = element.decode("utf-8")
if element is not None and element != "":
element = self.__convert_cell(element)
yield element
@ -151,8 +163,8 @@ class CSVSheetReader(SheetReader):
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(csv_cell_text)
shall_we_ignore_the_conversion = (
(ret in [float('inf'), float('-inf')]) and
self.__ignore_infinity
(ret in [float("inf"), float("-inf")])
and self.__ignore_infinity
)
if shall_we_ignore_the_conversion:
ret = None
@ -165,31 +177,37 @@ class CSVSheetReader(SheetReader):
def close(self):
if self.__file_handle:
self.__file_handle.close()
# else: means the generator has been run
# yes, no run, no file open.
# else: means the generator has been run
# yes, no run, no file open.
class CSVFileReader(CSVSheetReader):
""" read csv from phyical file """
def get_file_handle(self):
unicode_reader = None
if compact.PY2:
file_handle = open(self._native_sheet.payload, 'rb')
file_handle = open(self._native_sheet.payload, "rb")
unicode_reader = UTF8Recorder(file_handle, self._encoding)
else:
unicode_reader = open(self._native_sheet.payload, 'r',
encoding=self._encoding)
unicode_reader = open(
self._native_sheet.payload, "r", encoding=self._encoding
)
return unicode_reader
class CSVinMemoryReader(CSVSheetReader):
""" read csv file from memory """
def get_file_handle(self):
unicode_reader = None
if compact.PY2:
if hasattr(self._native_sheet.payload, 'read'):
unicode_reader = UTF8Recorder(self._native_sheet.payload,
self._encoding)
if hasattr(self._native_sheet.payload, "read"):
unicode_reader = UTF8Recorder(
self._native_sheet.payload, self._encoding
)
else:
unicode_reader = self._native_sheet.payload
else:
@ -200,7 +218,8 @@ class CSVinMemoryReader(CSVSheetReader):
# comes at a cost.
content = self._native_sheet.payload.read()
unicode_reader = compact.StringIO(
content.decode(self._encoding))
content.decode(self._encoding)
)
else:
unicode_reader = self._native_sheet.payload
@ -209,6 +228,7 @@ class CSVinMemoryReader(CSVSheetReader):
class CSVBookReader(BookReader):
""" read csv file """
def __init__(self):
BookReader.__init__(self)
self._file_type = constants.FILE_FORMAT_CSV
@ -232,12 +252,14 @@ class CSVBookReader(BookReader):
def open_content(self, file_content, **keywords):
try:
import mmap
encoding = keywords.get('encoding', 'utf-8')
encoding = keywords.get("encoding", "utf-8")
if isinstance(file_content, mmap.mmap):
# load from mmap
self.__multiple_sheets = keywords.get('multiple_sheets', False)
self.__multiple_sheets = keywords.get("multiple_sheets", False)
self._file_stream = CSVMemoryMapIterator(
file_content, encoding)
file_content, encoding
)
self._keywords = keywords
self._native_book = self._load_from_stream()
else:
@ -245,12 +267,10 @@ class CSVBookReader(BookReader):
if isinstance(file_content, bytes):
file_content = file_content.decode(encoding)
# else python 2.7 does not care about bytes nor str
BookReader.open_content(
self, file_content, **keywords)
BookReader.open_content(self, file_content, **keywords)
except ImportError:
# python 2.6 or Google app engine
BookReader.open_content(
self, file_content, **keywords)
BookReader.open_content(self, file_content, **keywords)
def read_sheet(self, native_sheet):
if self.__load_from_memory_flag:
@ -272,8 +292,8 @@ class CSVBookReader(BookReader):
"""
self.__load_from_memory_flag = True
self.__line_terminator = self._keywords.get(
constants.KEYWORD_LINE_TERMINATOR,
self.__line_terminator)
constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator
)
separator = DEFAULT_SHEET_SEPARATOR_FORMATTER % self.__line_terminator
if self.__multiple_sheets:
# will be slow for large files
@ -282,17 +302,20 @@ class CSVBookReader(BookReader):
sheets = content.split(separator)
named_contents = []
for sheet in sheets:
if sheet == '': # skip empty named sheet
if sheet == "": # skip empty named sheet
continue
lines = sheet.split(self.__line_terminator)
result = re.match(constants.SEPARATOR_MATCHER, lines[0])
new_content = '\n'.join(lines[1:])
new_sheet = NamedContent(result.group(1),
compact.StringIO(new_content))
new_content = "\n".join(lines[1:])
new_sheet = NamedContent(
result.group(1), compact.StringIO(new_content)
)
named_contents.append(new_sheet)
return named_contents
else:
if hasattr(self._file_stream, 'seek'):
if hasattr(self._file_stream, "seek"):
self._file_stream.seek(0)
return [NamedContent(self._file_type, self._file_stream)]
@ -303,30 +326,34 @@ class CSVBookReader(BookReader):
:returns: a book
"""
self.__line_terminator = self._keywords.get(
constants.KEYWORD_LINE_TERMINATOR,
self.__line_terminator)
names = self._file_name.split('.')
constants.KEYWORD_LINE_TERMINATOR, self.__line_terminator
)
names = self._file_name.split(".")
filepattern = "%s%s*%s*.%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1])
names[1],
)
filelist = glob.glob(filepattern)
if len(filelist) == 0:
file_parts = os.path.split(self._file_name)
return [NamedContent(file_parts[-1], self._file_name)]
else:
matcher = "%s%s(.*)%s(.*).%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
constants.DEFAULT_MULTI_CSV_SEPARATOR,
names[1])
names[1],
)
tmp_file_list = []
for filen in filelist:
result = re.match(matcher, filen)
tmp_file_list.append((result.group(1), result.group(2), filen))
ret = []
for lsheetname, index, filen in sorted(tmp_file_list,
key=lambda row: row[1]):
for lsheetname, index, filen in sorted(
tmp_file_list, key=lambda row: row[1]
):
ret.append(NamedContent(lsheetname, filen))
return ret

View File

@ -13,10 +13,7 @@ from pyexcel_io._compact import StringIO, PY2
from pyexcel_io.book import BookReader
from pyexcel_io.constants import FILE_FORMAT_CSVZ
from .csvr import (
CSVinMemoryReader,
NamedContent
)
from .csvr import CSVinMemoryReader, NamedContent
class CSVZipBookReader(BookReader):
@ -25,6 +22,7 @@ class CSVZipBookReader(BookReader):
Read zipped csv file that was zipped up by pyexcel-io. It support
single csv file and multiple csv files.
"""
def __init__(self):
BookReader.__init__(self)
self._file_type = FILE_FORMAT_CSVZ
@ -37,21 +35,18 @@ class CSVZipBookReader(BookReader):
def open_stream(self, file_stream, **keywords):
BookReader.open_stream(self, file_stream, **keywords)
self._native_book = self._load_from_file_alike_object(
self._file_stream)
self._file_stream
)
def read_sheet(self, native_sheet):
content = self.zipfile.read(native_sheet.payload)
if PY2:
sheet = StringIO(content)
else:
sheet = StringIO(content.decode('utf-8'))
sheet = StringIO(content.decode("utf-8"))
reader = CSVinMemoryReader(
NamedContent(
native_sheet.name,
sheet
),
**self._keywords
NamedContent(native_sheet.name, sheet), **self._keywords
)
return reader.to_array()
@ -61,10 +56,13 @@ class CSVZipBookReader(BookReader):
def _load_from_file_alike_object(self, file_alike_object):
try:
self.zipfile = zipfile.ZipFile(file_alike_object, 'r')
sheets = [NamedContent(_get_sheet_name(name), name)
for name in self.zipfile.namelist()]
self.zipfile = zipfile.ZipFile(file_alike_object, "r")
sheets = [
NamedContent(_get_sheet_name(name), name)
for name in self.zipfile.namelist()
]
return sheets
except zipfile.BadZipfile:
print("StringIO instance was passed by any chance?")
raise

View File

@ -13,14 +13,15 @@ from .csvr import CSVBookReader
class TSVBookReader(CSVBookReader):
""" Read tab separated values """
def __init__(self):
CSVBookReader.__init__(self)
self._file_type = constants.FILE_FORMAT_TSV
def open(self, file_name, **keywords):
keywords['dialect'] = constants.KEYWORD_TSV_DIALECT
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookReader.open(self, file_name, **keywords)
def open_stream(self, file_content, **keywords):
keywords['dialect'] = constants.KEYWORD_TSV_DIALECT
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookReader.open_stream(self, file_content, **keywords)

View File

@ -7,10 +7,7 @@
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.constants import (
FILE_FORMAT_TSVZ,
KEYWORD_TSV_DIALECT
)
from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT
from .csvz import CSVZipBookReader
@ -20,14 +17,15 @@ class TSVZipBookReader(CSVZipBookReader):
it supports single tsv file and mulitple tsv files
"""
def __init__(self):
CSVZipBookReader.__init__(self)
self._file_type = FILE_FORMAT_TSVZ
def open(self, file_name, **keywords):
keywords['dialect'] = KEYWORD_TSV_DIALECT
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookReader.open(self, file_name, **keywords)
def open_stream(self, file_content, **keywords):
keywords['dialect'] = KEYWORD_TSV_DIALECT
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookReader.open_stream(self, file_content, **keywords)

View File

@ -26,18 +26,14 @@ def detect_date_value(cell_text):
ret = None
try:
if len(cell_text) == 10:
ret = datetime.datetime.strptime(
cell_text,
"%Y-%m-%d")
ret = datetime.datetime.strptime(cell_text, "%Y-%m-%d")
ret = ret.date()
elif len(cell_text) == 19:
ret = datetime.datetime.strptime(
cell_text,
"%Y-%m-%d %H:%M:%S")
ret = datetime.datetime.strptime(cell_text, "%Y-%m-%d %H:%M:%S")
elif len(cell_text) > 19:
ret = datetime.datetime.strptime(
cell_text[0:26],
"%Y-%m-%d %H:%M:%S.%f")
cell_text[0:26], "%Y-%m-%d %H:%M:%S.%f"
)
except ValueError:
pass
return ret
@ -45,28 +41,34 @@ def detect_date_value(cell_text):
def detect_float_value(cell_text):
try:
should_we_skip_it = (cell_text.startswith('0') and
cell_text.startswith('0.') is False)
should_we_skip_it = (
cell_text.startswith("0") and cell_text.startswith("0.") is False
)
if should_we_skip_it:
# do not convert if a number starts with 0
# e.g. 014325
return None
else:
return float(cell_text)
except ValueError:
return None
def detect_int_value(cell_text):
if cell_text.startswith('0') and len(cell_text) > 1:
if cell_text.startswith("0") and len(cell_text) > 1:
return None
try:
return int(cell_text)
except ValueError:
pattern = '([0-9]+,)*[0-9]+$'
pattern = "([0-9]+,)*[0-9]+$"
if re.match(pattern, cell_text):
integer_string = cell_text.replace(',', '')
integer_string = cell_text.replace(",", "")
return int(integer_string)
else:
return None
@ -83,29 +85,27 @@ def date_value(value):
try:
# catch strptime exceptions only
if len(value) == 10:
ret = datetime.datetime.strptime(
value,
"%Y-%m-%d")
ret = datetime.datetime.strptime(value, "%Y-%m-%d")
ret = ret.date()
elif len(value) == 19:
ret = datetime.datetime.strptime(
value,
"%Y-%m-%dT%H:%M:%S")
ret = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
elif len(value) > 19:
ret = datetime.datetime.strptime(
value[0:26],
"%Y-%m-%dT%H:%M:%S.%f")
value[0:26], "%Y-%m-%dT%H:%M:%S.%f"
)
except ValueError:
pass
if ret == "invalid":
raise Exception("Bad date value %s" % value)
return ret
def time_value(value):
"""convert to time value accroding the specification"""
import re
results = re.match('PT(\d+)H(\d+)M(\d+)S', value)
results = re.match("PT(\d+)H(\d+)M(\d+)S", value)
if results and len(results.groups()) == 3:
hour = int(results.group(1))
minute = int(results.group(2))
@ -113,9 +113,9 @@ def time_value(value):
if hour < 24:
ret = datetime.time(hour, minute, second)
else:
ret = datetime.timedelta(hours=hour,
minutes=minute,
seconds=second)
ret = datetime.timedelta(
hours=hour, minutes=minute, seconds=second
)
else:
ret = None
return ret
@ -137,10 +137,10 @@ ODS_FORMAT_CONVERSION = {
"float": float,
"date": datetime.date,
"time": datetime.time,
'timedelta': datetime.timedelta,
"timedelta": datetime.timedelta,
"boolean": bool,
"percentage": float,
"currency": float
"currency": float,
}
@ -151,7 +151,7 @@ ODS_WRITE_FORMAT_COVERSION = {
datetime.date: "date",
datetime.time: "time",
datetime.timedelta: "timedelta",
bool: "boolean"
bool: "boolean",
}
if PY2:
@ -163,7 +163,7 @@ VALUE_CONVERTERS = {
"time": time_value,
"timedelta": time_value,
"boolean": boolean_value,
"percentage": float_value
"percentage": float_value,
}
@ -179,6 +179,7 @@ def ods_bool_value(value):
"""convert a boolean value to text"""
if value is True:
return "true"
else:
return "false"
@ -195,7 +196,7 @@ ODS_VALUE_CONVERTERS = {
"date": ods_date_value,
"time": ods_time_value,
"boolean": ods_bool_value,
"timedelta": ods_timedelta_value
"timedelta": ods_timedelta_value,
}
@ -206,5 +207,5 @@ VALUE_TOKEN = {
"boolean": "boolean-value",
"percentage": "value",
"currency": "value",
"timedelta": "time-value"
"timedelta": "time-value",
}

View File

@ -16,6 +16,7 @@ class NamedContent(object):
"""
Helper class for content that does not have a name
"""
def __init__(self, name, payload):
self.name = name
self.payload = payload
@ -25,12 +26,20 @@ class SheetReader(object):
"""
Generic sheet reader
"""
def __init__(self, sheet,
start_row=0, row_limit=-1,
start_column=0, column_limit=-1,
skip_row_func=None, skip_column_func=None,
skip_empty_rows=False, row_renderer=None,
**keywords):
def __init__(
self,
sheet,
start_row=0,
row_limit=-1,
start_column=0,
column_limit=-1,
skip_row_func=None,
skip_column_func=None,
skip_empty_rows=False,
row_renderer=None,
**keywords
):
self._native_sheet = sheet
self._keywords = {}
self._keywords.update(keywords)
@ -53,9 +62,11 @@ class SheetReader(object):
"""
for row_index, row in enumerate(self.row_iterator()):
row_position = self._skip_row(
row_index, self._start_row, self._row_limit)
row_index, self._start_row, self._row_limit
)
if row_position == constants.SKIP_DATA:
continue
elif row_position == constants.STOP_ITERATION:
break
@ -63,16 +74,19 @@ class SheetReader(object):
tmp_row = []
for column_index, cell_value in enumerate(
self.column_iterator(row)):
self.column_iterator(row)
):
column_position = self._skip_column(
column_index, self._start_column, self._column_limit)
column_index, self._start_column, self._column_limit
)
if column_position == constants.SKIP_DATA:
continue
elif column_position == constants.STOP_ITERATION:
break
tmp_row.append(cell_value)
if cell_value is not None and cell_value != '':
if cell_value is not None and cell_value != "":
return_row += tmp_row
tmp_row = []
if self._skip_empty_rows and len(return_row) < 1:

View File

@ -10,12 +10,12 @@
import pyexcel_io.constants as constants
XLS_PLUGIN = 'pyexcel-xls'
XLSX_PLUGIN = 'pyexcel-xlsx'
ODS_PLUGIN = 'pyexcel-ods'
ODS3_PLUGIN = 'pyexcel-ods3'
XLSXW_PLUGIN = 'pyexcel-xlsxw'
IO_ITSELF = 'pyexcel-io'
XLS_PLUGIN = "pyexcel-xls"
XLSX_PLUGIN = "pyexcel-xlsx"
ODS_PLUGIN = "pyexcel-ods"
ODS3_PLUGIN = "pyexcel-ods3"
XLSXW_PLUGIN = "pyexcel-xlsxw"
IO_ITSELF = "pyexcel-io"
AVAILABLE_READERS = {
@ -26,7 +26,7 @@ AVAILABLE_READERS = {
constants.FILE_FORMAT_CSV: [IO_ITSELF],
constants.FILE_FORMAT_TSV: [IO_ITSELF],
constants.FILE_FORMAT_CSVZ: [IO_ITSELF],
constants.FILE_FORMAT_TSVZ: [IO_ITSELF]
constants.FILE_FORMAT_TSVZ: [IO_ITSELF],
}
AVAILABLE_WRITERS = {
@ -37,7 +37,7 @@ AVAILABLE_WRITERS = {
constants.FILE_FORMAT_CSV: [IO_ITSELF],
constants.FILE_FORMAT_TSV: [IO_ITSELF],
constants.FILE_FORMAT_CSVZ: [IO_ITSELF],
constants.FILE_FORMAT_TSVZ: [IO_ITSELF]
constants.FILE_FORMAT_TSVZ: [IO_ITSELF],
}
@ -55,16 +55,19 @@ def is_empty_array(array):
"""
Check if an array is an array of '' or not
"""
empty_array = [element for element in array if element != '']
empty_array = [element for element in array if element != ""]
return len(empty_array) == 0
def swap_empty_string_for_none(array):
""" replace empty string fields with None """
def swap(value):
""" change empty string to None """
if value == '':
if value == "":
return None
else:
return value
return [swap(x) for x in array]

View File

@ -11,19 +11,19 @@ from pyexcel_io.plugins import IOPluginInfoChain
IOPluginInfoChain(__name__).add_a_writer(
relative_plugin_class_path='csvw.CSVBookWriter',
file_types=['csv'],
stream_type='text'
relative_plugin_class_path="csvw.CSVBookWriter",
file_types=["csv"],
stream_type="text",
).add_a_writer(
relative_plugin_class_path='tsv.TSVBookWriter',
file_types=['tsv'],
stream_type='text'
relative_plugin_class_path="tsv.TSVBookWriter",
file_types=["tsv"],
stream_type="text",
).add_a_writer(
relative_plugin_class_path='csvz.CSVZipBookWriter',
file_types=['csvz'],
stream_type='binary'
relative_plugin_class_path="csvz.CSVZipBookWriter",
file_types=["csvz"],
stream_type="binary",
).add_a_writer(
relative_plugin_class_path='tsvz.TSVZipBookWriter',
file_types=['tsvz'],
stream_type='binary'
relative_plugin_class_path="tsvz.TSVZipBookWriter",
file_types=["tsvz"],
stream_type="binary",
)

View File

@ -32,7 +32,8 @@ class UnicodeWriter(object):
def writerow(self, row):
""" write row into the csv file """
self.writer.writerow(
[compact.text_type(s).encode("utf-8") for s in row])
[compact.text_type(s).encode("utf-8") for s in row]
)
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
@ -54,24 +55,32 @@ class CSVSheetWriter(SheetWriter):
csv file writer
"""
def __init__(self, filename, name,
encoding="utf-8", single_sheet_in_book=False,
sheet_index=None, **keywords):
def __init__(
self,
filename,
name,
encoding="utf-8",
single_sheet_in_book=False,
sheet_index=None,
**keywords
):
self._encoding = encoding
self._sheet_name = name
self._single_sheet_in_book = single_sheet_in_book
self.__line_terminator = constants.DEFAULT_CSV_NEWLINE
if constants.KEYWORD_LINE_TERMINATOR in keywords:
self.__line_terminator = keywords.get(
constants.KEYWORD_LINE_TERMINATOR)
constants.KEYWORD_LINE_TERMINATOR
)
if single_sheet_in_book:
self._sheet_name = None
self._sheet_index = sheet_index
self.writer = None
self.file_handle = None
SheetWriter.__init__(
self, filename, self._sheet_name, self._sheet_name,
**keywords)
self, filename, self._sheet_name, self._sheet_name, **keywords
)
def write_row(self, array):
"""
@ -82,6 +91,7 @@ class CSVSheetWriter(SheetWriter):
class CSVFileWriter(CSVSheetWriter):
""" Write csv to a physical file """
def close(self):
self.file_handle.close()
@ -91,46 +101,62 @@ class CSVFileWriter(CSVSheetWriter):
file_name = "%s%s%s%s%s.%s" % (
names[0],
constants.DEFAULT_MULTI_CSV_SEPARATOR,
name, # sheet name
name, # sheet name
constants.DEFAULT_MULTI_CSV_SEPARATOR,
self._sheet_index, # sheet index
names[1])
names[1],
)
else:
file_name = self._native_book
if compact.PY2:
self.file_handle = open(file_name, "wb")
self.writer = UnicodeWriter(self.file_handle,
encoding=self._encoding,
**self._keywords)
self.writer = UnicodeWriter(
self.file_handle, encoding=self._encoding, **self._keywords
)
else:
self.file_handle = open(file_name, "w", newline="",
encoding=self._encoding)
self.file_handle = open(
file_name, "w", newline="", encoding=self._encoding
)
self.writer = csv.writer(self.file_handle, **self._keywords)
class CSVMemoryWriter(CSVSheetWriter):
""" Write csv to a memory stream """
def __init__(self, filename, name,
encoding="utf-8", single_sheet_in_book=False,
sheet_index=None, **keywords):
CSVSheetWriter.__init__(self, filename, name,
encoding=encoding,
single_sheet_in_book=single_sheet_in_book,
sheet_index=sheet_index, **keywords)
def __init__(
self,
filename,
name,
encoding="utf-8",
single_sheet_in_book=False,
sheet_index=None,
**keywords
):
CSVSheetWriter.__init__(
self,
filename,
name,
encoding=encoding,
single_sheet_in_book=single_sheet_in_book,
sheet_index=sheet_index,
**keywords
)
def set_sheet_name(self, name):
if compact.PY2:
self.file_handle = self._native_book
self.writer = UnicodeWriter(self.file_handle,
encoding=self._encoding,
**self._keywords)
self.writer = UnicodeWriter(
self.file_handle, encoding=self._encoding, **self._keywords
)
else:
self.file_handle = self._native_book
self.writer = csv.writer(self.file_handle, **self._keywords)
if not self._single_sheet_in_book:
self.writer.writerow(
[constants.DEFAULT_CSV_STREAM_FILE_FORMATTER % (
self._sheet_name, "")]
[
constants.DEFAULT_CSV_STREAM_FILE_FORMATTER
% (self._sheet_name, "")
]
)
def close(self):
@ -139,12 +165,12 @@ class CSVMemoryWriter(CSVSheetWriter):
# because the io stream can be used later
pass
else:
self.writer.writerow(
[constants.SEPARATOR_FORMATTER % ""])
self.writer.writerow([constants.SEPARATOR_FORMATTER % ""])
class CSVBookWriter(BookWriter):
""" write csv with unicode support """
def __init__(self):
BookWriter.__init__(self)
self._file_type = constants.FILE_FORMAT_CSV
@ -160,6 +186,7 @@ class CSVBookWriter(BookWriter):
self._file_alike_object,
name,
sheet_index=self.__index,
**self._keywords)
**self._keywords
)
self.__index = self.__index + 1
return writer

View File

@ -18,21 +18,21 @@ from .csvw import CSVSheetWriter, UnicodeWriter
class CSVZipSheetWriter(CSVSheetWriter):
""" handle the zipfile interface """
def __init__(self, zipfile, sheetname, file_extension, **keywords):
self.file_extension = file_extension
keywords['single_sheet_in_book'] = False
keywords["single_sheet_in_book"] = False
CSVSheetWriter.__init__(self, zipfile, sheetname, **keywords)
def set_sheet_name(self, name):
self.content = StringIO()
if PY2:
self.writer = UnicodeWriter(
self.content,
encoding=self._encoding,
**self._keywords
self.content, encoding=self._encoding, **self._keywords
)
else:
import csv
self.writer = csv.writer(self.content, **self._keywords)
def close(self):
@ -50,6 +50,7 @@ class CSVZipBookWriter(BookWriter):
Pyexcel-io had the facility to unzip it for you or you could use
any other unzip software.
"""
def __init__(self):
BookWriter.__init__(self)
self._file_type = FILE_FORMAT_CSVZ
@ -57,17 +58,14 @@ class CSVZipBookWriter(BookWriter):
def open(self, file_name, **keywords):
BookWriter.open(self, file_name, **keywords)
self.zipfile = zipfile.ZipFile(file_name, 'w', zipfile.ZIP_DEFLATED)
self.zipfile = zipfile.ZipFile(file_name, "w", zipfile.ZIP_DEFLATED)
def create_sheet(self, name):
given_name = name
if given_name is None:
given_name = DEFAULT_SHEET_NAME
writer = CSVZipSheetWriter(
self.zipfile,
given_name,
self._file_type[:3],
**self._keywords
self.zipfile, given_name, self._file_type[:3], **self._keywords
)
return writer

View File

@ -13,10 +13,11 @@ from .csvw import CSVBookWriter
class TSVBookWriter(CSVBookWriter):
""" write tsv """
def __init__(self):
CSVBookWriter.__init__(self)
self._file_type = constants.FILE_FORMAT_TSV
def open(self, file_name, **keywords):
keywords['dialect'] = constants.KEYWORD_TSV_DIALECT
keywords["dialect"] = constants.KEYWORD_TSV_DIALECT
CSVBookWriter.open(self, file_name, **keywords)

View File

@ -7,10 +7,7 @@
:copyright: (c) 2014-2017 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from pyexcel_io.constants import (
FILE_FORMAT_TSVZ,
KEYWORD_TSV_DIALECT
)
from pyexcel_io.constants import FILE_FORMAT_TSVZ, KEYWORD_TSV_DIALECT
from .csvz import CSVZipBookWriter
@ -20,10 +17,11 @@ class TSVZipBookWriter(CSVZipBookWriter):
It is similiar to CSVZipBookWriter, but support tab separated values
"""
def __init__(self):
CSVZipBookWriter.__init__(self)
self._file_type = FILE_FORMAT_TSVZ
def open(self, file_name, **keywords):
keywords['dialect'] = KEYWORD_TSV_DIALECT
keywords["dialect"] = KEYWORD_TSV_DIALECT
CSVZipBookWriter.open(self, file_name, **keywords)