From a923bce3be7ee820aef89fa0a03dad06d1789107 Mon Sep 17 00:00:00 2001 From: jaska Date: Tue, 6 Oct 2020 22:13:56 +0100 Subject: [PATCH] Bits bulbs (#92) * :books: update doc strings and plugin compactibility list * :fire: remove PY2 related code and update docs * :microscope: more test coverage --- .moban.d/docs/source/index.rst.jj2 | 3 +- docs/source/api/pyexcel_io.get_data.rst | 4 +- docs/source/api/pyexcel_io.iget_data.rst | 6 +++ docs/source/api/pyexcel_io.save_data.rst | 4 +- docs/source/common_parameters.rst | 24 ++++++--- docs/source/extensions.rst | 65 ++++++++++++++---------- docs/source/index.rst | 3 +- docs/source/options.rst | 11 ---- docs/source/renderer.rst | 1 - pyexcel_io/_compact.py | 18 ++----- pyexcel_io/io.py | 15 ++++-- tests/test_io.py | 24 +++------ tests/test_new_csvz_book.py | 3 -- tests/test_service.py | 56 ++++++++++++++++++++ 14 files changed, 146 insertions(+), 91 deletions(-) create mode 100644 docs/source/api/pyexcel_io.iget_data.rst delete mode 100644 docs/source/options.rst diff --git a/.moban.d/docs/source/index.rst.jj2 b/.moban.d/docs/source/index.rst.jj2 index 49f216e..faf8d4d 100644 --- a/.moban.d/docs/source/index.rst.jj2 +++ b/.moban.d/docs/source/index.rst.jj2 @@ -57,6 +57,7 @@ get_data(.., library='pyexcel-ods') ============= ======= ======== ======= ======== ======== ======== `pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_ ============= ======= ======== ======= ======== ======== ======== + 0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x @@ -91,7 +92,6 @@ get_data(.., library='pyexcel-ods') csvz sqlalchemy django - options extensions @@ -108,6 +108,7 @@ API .. autosummary:: :toctree: api/ + iget_data get_data save_data diff --git a/docs/source/api/pyexcel_io.get_data.rst b/docs/source/api/pyexcel_io.get_data.rst index 671a8d1..9f3c898 100644 --- a/docs/source/api/pyexcel_io.get_data.rst +++ b/docs/source/api/pyexcel_io.get_data.rst @@ -1,5 +1,5 @@ -pyexcel_io.get_data -=================== +pyexcel\_io.get\_data +===================== .. currentmodule:: pyexcel_io diff --git a/docs/source/api/pyexcel_io.iget_data.rst b/docs/source/api/pyexcel_io.iget_data.rst new file mode 100644 index 0000000..4b21126 --- /dev/null +++ b/docs/source/api/pyexcel_io.iget_data.rst @@ -0,0 +1,6 @@ +pyexcel\_io.iget\_data +====================== + +.. currentmodule:: pyexcel_io + +.. autofunction:: iget_data \ No newline at end of file diff --git a/docs/source/api/pyexcel_io.save_data.rst b/docs/source/api/pyexcel_io.save_data.rst index 791f311..fcece28 100644 --- a/docs/source/api/pyexcel_io.save_data.rst +++ b/docs/source/api/pyexcel_io.save_data.rst @@ -1,5 +1,5 @@ -pyexcel_io.save_data -==================== +pyexcel\_io.save\_data +====================== .. currentmodule:: pyexcel_io diff --git a/docs/source/common_parameters.rst b/docs/source/common_parameters.rst index 92926e5..3ec67af 100644 --- a/docs/source/common_parameters.rst +++ b/docs/source/common_parameters.rst @@ -2,9 +2,26 @@ Common parameters ================================================================================ +'library' option is added +-------------------------------------------------------------------------------- + +In order to have overlapping plugins co-exit, 'library' option is added to +get_data and save_data. + + +get_data only parameters +------------------------------- + +keep_trailing_empty_cells +******************************************************************************** + +default: False + +If turned on, the return data will contain trailing empty cells. + auto_dectect_datetime --------------------------------------------------------------------------------- +******************************************************************************** The datetime formats are: @@ -14,11 +31,6 @@ The datetime formats are: Any other datetime formats will be thrown as ValueError -'library' option is added --------------------------------------------------------------------------------- - -In order to have overlapping plugins co-exit, 'library' option is added to -get_data and save_data. csv only parameters -------------------------------------------------------------------------------- diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst index 46eb2d9..6d55018 100644 --- a/docs/source/extensions.rst +++ b/docs/source/extensions.rst @@ -1,8 +1,12 @@ Extend pyexcel-io Tutorial ================================================================================ -You are welcome extend pyexcel-io to read and write more tabular formats. In -github repo, you will find two examples in `examples` folder. This section +You are welcome toextend pyexcel-io to read and write more tabular formats. +No. 1 rule, your plugin must have a prefix 'pyexcel_' in its module path. +For example, `pyexcel-xls` has 'pyexcel_xls' as its module path. Otherwise, +pyexcel-io will not load your plugin. + +On github, you will find two examples in `examples` folder. This section explains its implementations to help you write yours. .. note:: @@ -10,7 +14,7 @@ explains its implementations to help you write yours. No longer, you will need to do explicit imports for pyexcel-io extensions. Instead, you install them and manage them via pip. -Reader +Simple Reader for a yaml file -------------------------------------------------------------------------------- Suppose we have a yaml file, containing a dictionary where the values are @@ -60,7 +64,7 @@ files on physical disk. "memory" means a file stream. "content" means a string b :lines: 36-41 -**Test your reader ** +**Test your reader** Let's run the following code and see if it works. @@ -68,13 +72,21 @@ Let's run the following code and see if it works. :language: python :lines: 43-45 -Writer + +You would see these in standard output: + +.. code-block:: bash + + $ python custom_yaml_reader.py + OrderedDict([('sheet 1', [[1, 2, 3], [2, 3, 4]]), ('sheet 2', [['A', 'B', 'C']])]) + +A writer to write content in yaml -------------------------------------------------------------------------------- Now for the writer, let's write a pyexcel-io writer that write a dictionary of two dimentaional arrays back into a yaml file seen above. -** Implement IWriter ** +**Implement IWriter** Two abstract functions are required: @@ -85,7 +97,7 @@ Two abstract functions are required: :language: python :lines: 18-30 -** Implement ISheetWriter ** +**Implement ISheetWriter** It is imagined that you will have your own sheet writer. You simply need to figure out how to write a row. Row by row write action was already written by `ISheetWrier`. @@ -111,8 +123,25 @@ Let's run the following code and please examine `mytest.yaml` yourself. :language: python :lines: 40-46 +And you shall find a file named 'mytest.yaml': + + +.. code-block:: bash + + $ cat mytest.yaml + sheet 1: + - - 1 + - 3 + - 4 + - - 2 + - 4 + - 9 + sheet 2: + - - B + - C + - D + - Other pyexcel-io plugins ----------------------------------------------------------------------------- @@ -138,26 +167,6 @@ And you can also get the data back:: [[1, 2, 3]] -Work with memory file ------------------------------------------------------------------------------ - -Here is the sample code to work with memory file:: - - >>> from pyexcel_io.manager import get_io - >>> io = get_io("xls") - >>> data = [[1,2,3]] - >>> save_data(io, data, "xls") - -The difference is that you have mention file type if you use :meth:`pyexcel_io.save_data` - -And you can also get the data back:: - - >>> data = get_data(io, "xls") - >>> data['pyexcel_sheet1'] - [[1, 2, 3]] - -The same applies to :meth:`pyexcel_io.get_data`. - Other formats ----------------------------------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 2d47c3c..4b66b2c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -159,6 +159,7 @@ get_data(.., library='pyexcel-ods') ============= ======= ======== ======= ======== ======== ======== `pyexcel-io`_ `xls`_ `xlsx`_ `ods`_ `ods3`_ `odsr`_ `xlsxw`_ ============= ======= ======== ======= ======== ======== ======== + 0.6.0+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.10+ 0.5.0+ 0.5.0+ 0.5.4 0.5.3 0.5.0+ 0.5.0+ 0.5.1+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.5.0+ 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x 0.4.x @@ -193,7 +194,6 @@ get_data(.., library='pyexcel-ods') csvz sqlalchemy django - options extensions @@ -210,6 +210,7 @@ API .. autosummary:: :toctree: api/ + iget_data get_data save_data diff --git a/docs/source/options.rst b/docs/source/options.rst deleted file mode 100644 index 9ef9db4..0000000 --- a/docs/source/options.rst +++ /dev/null @@ -1,11 +0,0 @@ -Options -====================== - -Here is the documentation on the keyword options for get_data. - -keep_trailing_empty_cells ------------------------------- - -default: False - -If turned on, the return data will contain trailing empty cells. diff --git a/docs/source/renderer.rst b/docs/source/renderer.rst index 24a6f0a..2b5ab4d 100644 --- a/docs/source/renderer.rst +++ b/docs/source/renderer.rst @@ -1,5 +1,4 @@ Rendering(Formatting) the data - ================================================================================ You might want to do custom rendering on your data obtained. `row_renderer` was diff --git a/pyexcel_io/_compact.py b/pyexcel_io/_compact.py index 6b691c4..90ebd3a 100644 --- a/pyexcel_io/_compact.py +++ b/pyexcel_io/_compact.py @@ -7,16 +7,9 @@ :copyright: (c) 2014-2020 by Onni Software Ltd. :license: New BSD License, see LICENSE for more details """ -# flake8: noqa -# pylint: disable=import-error -# pylint: disable=invalid-name -# pylint: disable=too-few-public-methods -# pylint: disable=ungrouped-imports -# pylint: disable=redefined-variable-type -import sys -import types import logging -from collections import OrderedDict +from io import BytesIO, StringIO # noqa: F401 +from collections import OrderedDict # noqa: F401 try: from logging import NullHandler @@ -27,8 +20,6 @@ except ImportError: pass -from io import BytesIO, StringIO - text_type = str irange = range @@ -48,7 +39,4 @@ def isstream(instance): def is_string(atype): """find out if a type is str or not""" - if atype == str: - return True - - return False + return atype == str diff --git a/pyexcel_io/io.py b/pyexcel_io/io.py index eba9315..1606f83 100644 --- a/pyexcel_io/io.py +++ b/pyexcel_io/io.py @@ -25,6 +25,10 @@ from pyexcel_io.exceptions import ( def iget_data(afile, file_type=None, **keywords): """Get data from an excel file source + The data has not gone into memory yet. If you use dedicated partial read + plugins, such as pyexcel-xlsxr, pyexcel-odsr, you will notice + the memory consumption drop when you work with big files. + :param afile: a file name, a file stream or actual content :param sheet_name: the name of the sheet to be loaded :param sheet_index: the index of the sheet to be loaded @@ -32,9 +36,6 @@ def iget_data(afile, file_type=None, **keywords): :param file_type: used only when filename is not a physical file name :param force_file_type: used only when filename refers to a physical file and it is intended to open it as forced file type. - :param streaming: toggles the type of returned data. The values of the - returned dictionary remain as generator if it is set - to True. Default is False. :param library: explicitly name a library for use. e.g. library='pyexcel-ods' :param auto_detect_float: defaults to True @@ -44,6 +45,7 @@ def iget_data(afile, file_type=None, **keywords): :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored :param default_float_nan: choose one form of 'NaN', 'nan' :param pep_0515_off: turn off pep 0515. default to True. + :param keep_trailing_empty_cells: keep trailing columns. default to False :param keywords: any other library specific parameters :returns: an ordered dictionary """ @@ -59,7 +61,10 @@ def get_data(afile, file_type=None, streaming=None, **keywords): :param afile: a file name, a file stream or actual content :param sheet_name: the name of the sheet to be loaded :param sheet_index: the index of the sheet to be loaded + :param sheets: a list of sheet to be loaded :param file_type: used only when filename is not a physial file name + :param force_file_type: used only when filename refers to a physical file + and it is intended to open it as forced file type. :param streaming: toggles the type of returned data. The values of the returned dictionary remain as generator if it is set to True. Default is False. @@ -69,6 +74,10 @@ def get_data(afile, file_type=None, streaming=None, **keywords): :param auto_detect_int: defaults to True :param auto_detect_datetime: defaults to True :param ignore_infinity: defaults to True + :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored + :param default_float_nan: choose one form of 'NaN', 'nan' + :param pep_0515_off: turn off pep 0515. default to True. + :param keep_trailing_empty_cells: keep trailing columns. default to False :param keywords: any other library specific parameters :returns: an ordered dictionary """ diff --git a/tests/test_io.py b/tests/test_io.py index e0eec0f..cf14ca4 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,5 +1,4 @@ import os -import sys import types from zipfile import BadZipfile from unittest import TestCase @@ -12,8 +11,6 @@ from pyexcel_io._compact import BytesIO, StringIO, OrderedDict, is_string from nose.tools import eq_, raises -PY2 = sys.version_info[0] == 2 - @raises(IOError) def test_directory_name_as_file(): @@ -116,11 +113,8 @@ def test_load_unknown_data_from_memory(): @raises(BadZipfile) def test_load_csvz_data_from_memory(): - if not PY2: - io = StringIO() - get_data(io, file_type="csvz") - else: - raise BadZipfile("pass it") + io = StringIO() + get_data(io, file_type="csvz") @raises(IOError) @@ -130,12 +124,9 @@ def test_write_xlsx_data(): @raises(Exception) def test_writer_csvz_data_from_memory(): - if not PY2: - io = StringIO() - writer = get_writer(io, file_type="csvz") - writer.write({"adb": [[2, 3]]}) - else: - raise Exception("pass it") + io = StringIO() + writer = get_writer(io, file_type="csvz") + writer.write({"adb": [[2, 3]]}) @raises(exceptions.NoSupportingPluginFound) @@ -264,10 +255,7 @@ def test_conversion_from_bytes_to_text(): def test_is_string(): - if PY2: - assert is_string(type(u"a")) is True - else: - assert is_string(type("a")) is True + assert is_string(type("a")) is True def test_generator_is_obtained(): diff --git a/tests/test_new_csvz_book.py b/tests/test_new_csvz_book.py index 6f42b13..2d4c4ef 100644 --- a/tests/test_new_csvz_book.py +++ b/tests/test_new_csvz_book.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import os -import sys import zipfile from unittest import TestCase @@ -12,8 +11,6 @@ from pyexcel_io._compact import OrderedDict from nose.tools import raises -PY2 = sys.version_info[0] == 2 - class TestCSVZ(TestCase): file_type = "csvz" diff --git a/tests/test_service.py b/tests/test_service.py index 8d0af9a..7ded3d6 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -1,10 +1,17 @@ +from datetime import time, datetime, timedelta + from pyexcel_io.service import ( date_value, time_value, + boolean_value, + ods_bool_value, + ods_date_value, + ods_time_value, ods_float_value, throw_exception, detect_int_value, detect_float_value, + ods_timedelta_value, ) from pyexcel_io.exceptions import IntegerAccuracyLossError @@ -106,3 +113,52 @@ def test_big_int_value(): @raises(IntegerAccuracyLossError) def test_throw_exception(): throw_exception(1000000000000000) + + +def test_boolean_value(): + fixture = ["true", "false", 1] + expected = [True, False, 1] + + actual = [boolean_value(element) for element in fixture] + eq_(actual, expected) + + +def test_time_delta_presentation(): + a = datetime(2020, 12, 12, 12, 12, 12) + b = datetime(2020, 11, 12, 12, 12, 11) + delta = a - b + + value = ods_timedelta_value(delta) + eq_(value, "PT720H00M01S") + + +def test_ods_bool_to_string(): + fixture = [True, False] + expected = ["true", "false"] + + actual = [ods_bool_value(element) for element in fixture] + eq_(actual, expected) + + +def test_ods_time_value(): + test = datetime(2020, 10, 6, 11, 11, 11) + actual = ods_time_value(test) + eq_(actual, "PT11H11M11S") + + +def test_ods_date_value(): + test = datetime(2020, 10, 6, 11, 11, 11) + actual = ods_date_value(test) + eq_(actual, "2020-10-06") + + +def test_time_value_returns_time_delta(): + test_time_value = "PT720H00M01S" + delta = time_value(test_time_value) + eq_(delta, timedelta(days=30, seconds=1)) + + +def test_time_value(): + test_time_value = "PT23H00M01S" + delta = time_value(test_time_value) + eq_(delta, time(23, 0, 1))