Merge pull request #50 from pyexcel/dev

release 0.5.8
2018-08-16 19:31:22 +01:00 · 2018-08-16 19:31:22 +01:00 · 0d1151e79c
parent d2e1e8ad99 4f06d5ac3a
commit 0d1151e79c
15 changed files with 274 additions and 44 deletions
--- a/.moban.yml
+++ b/.moban.yml
@ -4,7 +4,7 @@ configuration:
  - "commons/templates"
  - "setupmobans/templates"
  - ".moban.d"
-  configuration: pyexcel_io.yaml
+  configuration: pyexcel-io.yml
 targets:
  - "docs/source/conf.py": "docs/source/conf.py"
  - setup.py: setup.py
--- a/.travis.yml
+++ b/.travis.yml
@ -4,6 +4,7 @@ notifications:
  email: false
 python:
  - pypy-5.3.1
+  - 3.7-dev
  - 3.6
  - 3.5
  - 3.4
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -1,6 +1,16 @@
 Change log
 ================================================================================

+0.5.8 - unreleased
+--------------------------------------------------------------------------------
+
+fixed
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. `#49 <https://github.com/pyexcel/pyexcel-io/issues/49>`_, support additional
+   options when detecting float values in csv format. default_float_nan,
+   ignore_nan_text
+
 0.5.7 - 02.05.2018
 --------------------------------------------------------------------------------

--- a/4
+++ b/4
@ -13,7 +13,7 @@ that the following conditions are met:
  and/or other materials provided with the distribution.

 * Neither the name of 'pyexcel-io' nor the names of the contributors
-  may not be used to endorse or promote products derived from this software
+  may be used to endorse or promote products derived from this software
  without specific prior written permission.

 THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND
@ -27,4 +27,4 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
+DAMAGE.
--- a/changelog.yml
+++ b/changelog.yml
@ -1,6 +1,12 @@
 name: pyexcel-io
 organisation: pyexcel
 releases:
+- changes:
+  - action: fixed
+    details:
+    - '`#49`, support additional options when detecting float values in csv format. default_float_nan, ignore_nan_text'
+  date: unreleased
+  version: 0.5.8
 - changes:
  - action: fixed
    details:
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -4,26 +4,189 @@ DESCRIPTION = (
    'format and to/from databases' +
    ''
 )
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+
+project = u'pyexcel-io'
+copyright = u'2015-2018 Onni Software Ltd.'
+author = u'C.W.'
+
+# The short X.Y version
+version = u'0.5.8'
+# The full version, including alpha/beta/rc tags
+release = u'0.5.8'
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.doctest',
    'sphinx.ext.intersphinx',
    'sphinx.ext.viewcode',
-    'sphinx.ext.autosummary'
 ]
-intersphinx_mapping = {
-    'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None),
-}
+
+# Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
 source_suffix = '.rst'
+
+# The master toctree document.
 master_doc = 'index'

-project = u'pyexcel-io'
-copyright = u'2015-2018 Onni Software Ltd.'
-version = '0.5.7'
-release = '0.5.7'
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = 'en'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pyexcel-iodoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'pyexcel-io.tex', u'pyexcel-io Documentation',
+     u'Onni Software Ltd.', 'manual'),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
+     author, 'pyexcel-io', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+# -- Extension configuration -------------------------------------------------
+# -- Options for intersphinx extension ---------------------------------------
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'https://docs.python.org/': None}
+# TODO: html_theme not configurable upstream
 html_theme = 'default'


@ -31,19 +194,8 @@ def setup(app):
    app.add_stylesheet('theme_overrides.css')


-html_static_path = ['_static']
-htmlhelp_basename = 'pyexcel-iodoc'
-latex_elements = {}
-latex_documents = [
-    ('index', 'pyexcel-io.tex',
-     'pyexcel-io Documentation',
-     'Onni Software Ltd.', 'manual'),
-]
-man_pages = [
-    ('index', 'pyexcel-io',
-     'pyexcel-io Documentation',
-     [u'Onni Software Ltd.'], 1)
-]
+
+# TODO: DESCRIPTION not configurable upstream
 texinfo_documents = [
    ('index', 'pyexcel-io',
     'pyexcel-io Documentation',
@ -51,3 +203,6 @@ texinfo_documents = [
     DESCRIPTION,
     'Miscellaneous'),
 ]
+intersphinx_mapping.update({
+    'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None),
+})
--- a/pyexcel_io.yaml
+++ b/pyexcel_io.yaml
@ -1,9 +1,9 @@
 overrides: "pyexcel.yaml"
 name: "pyexcel-io"
 nick_name: io
-version: 0.5.7
-current_version: 0.5.7
-release: 0.5.7
+version: 0.5.8
+current_version: 0.5.8
+release: 0.5.8
 dependencies:
  - ordereddict;python_version<"2.7"
  - lml==0.0.1
--- a/pyexcel_io/io.py
+++ b/pyexcel_io/io.py
@ -32,6 +32,9 @@ def iget_data(afile, file_type=None, **keywords):
    :param auto_detect_int: defaults to True
    :param auto_detect_datetime: defaults to True
    :param ignore_infinity: defaults to True
+    :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
+    :param default_float_nan: choose one form of 'NaN', 'nan'
+    :param pep_0515_off: turn off pep 0515. default to True.
    :param keywords: any other library specific parameters
    :returns: an ordered dictionary
    """
--- a/pyexcel_io/readers/csvr.py
+++ b/pyexcel_io/readers/csvr.py
@ -131,6 +131,8 @@ class CSVSheetReader(SheetReader):
        auto_detect_int=True,
        auto_detect_datetime=True,
        pep_0515_off=True,
+        ignore_nan_text=False,
+        default_float_nan=None,
        **keywords
    ):
        SheetReader.__init__(self, sheet, **keywords)
@ -141,6 +143,8 @@ class CSVSheetReader(SheetReader):
        self.__auto_detect_datetime = auto_detect_datetime
        self.__file_handle = None
        self.__pep_0515_off = pep_0515_off
+        self.__ignore_nan_text = ignore_nan_text
+        self.__default_float_nan = default_float_nan

    def get_file_handle(self):
        """ return me unicde reader for csv """
@ -164,7 +168,9 @@ class CSVSheetReader(SheetReader):
            ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
        if ret is None and self.__auto_detect_float:
            ret = service.detect_float_value(
-                csv_cell_text, self.__pep_0515_off
+                csv_cell_text, self.__pep_0515_off,
+                ignore_nan_text=self.__ignore_nan_text,
+                default_float_nan=self.__default_float_nan
            )
            shall_we_ignore_the_conversion = (
                (ret in [float("inf"), float("-inf")])
--- a/pyexcel_io/service.py
+++ b/pyexcel_io/service.py
@ -39,7 +39,10 @@ def detect_date_value(cell_text):
    return ret


-def detect_float_value(cell_text, pep_0515_off=True):
+def detect_float_value(
+        cell_text, pep_0515_off=True,
+        ignore_nan_text=False,
+        default_float_nan=None):
    should_we_skip_it = (
        cell_text.startswith("0") and cell_text.startswith("0.") is False
    )
@ -54,7 +57,19 @@ def detect_float_value(cell_text, pep_0515_off=True):
            return None

    try:
-        return float(cell_text)
+        if ignore_nan_text:
+            if cell_text.lower() == "nan":
+                return None
+            else:
+                return float(cell_text)
+        else:
+            if cell_text.lower() == "nan":
+                if cell_text == default_float_nan:
+                    return float("NaN")
+                else:
+                    return None
+            else:
+                return float(cell_text)
    except ValueError:
        return None

--- a/setup.py
+++ b/setup.py
@ -1,4 +1,6 @@
-# Template by setupmobans
+#!/usr/bin/env python3
+
+# Template by pypi-mobans
 import os
 import sys
 import codecs
@ -9,7 +11,7 @@ PY26 = PY2 and sys.version_info[1] < 7

 NAME = 'pyexcel-io'
 AUTHOR = 'C.W.'
-VERSION = '0.5.7'
+VERSION = '0.5.8'
 EMAIL = 'wangc_2011@hotmail.com'
 LICENSE = 'New BSD'
 DESCRIPTION = (
@ -17,7 +19,7 @@ DESCRIPTION = (
    'format and to/from databases'
 )
 URL = 'https://github.com/pyexcel/pyexcel-io'
-DOWNLOAD_URL = '%s/archive/0.5.7.tar.gz' % URL
+DOWNLOAD_URL = '%s/archive/0.5.8.tar.gz' % URL
 FILES = ['README.rst', 'CHANGELOG.rst']
 KEYWORDS = [
    'API',
@ -27,12 +29,10 @@ KEYWORDS = [
    'csvz',
    'django',
    'sqlalchemy',
-    'python'
+    'python',
 ]

 CLASSIFIERS = [
-    'Topic :: Office/Business',
-    'Topic :: Utilities',
    'Topic :: Software Development :: Libraries',
    'Programming Language :: Python',
    'Intended Audience :: Developers',
@ -63,8 +63,8 @@ EXTRAS_REQUIRE = {
 # You do not need to read beyond this line
 PUBLISH_COMMAND = '{0} setup.py sdist bdist_wheel upload -r pypi'.format(
    sys.executable)
-GS_COMMAND = ('gs pyexcel-io v0.5.7 ' +
-              "Find 0.5.7 in changelog for more details")
+GS_COMMAND = ('gs pyexcel-io v0.5.8 ' +
+              "Find 0.5.8 in changelog for more details")
 NO_GS_MESSAGE = ('Automatic github release is disabled. ' +
                 'Please install gease to enable it.')
 UPLOAD_FAILED_MSG = (
@ -140,7 +140,8 @@ def read_files(*files):

 def read(afile):
    """Read a file into setup"""
-    with codecs.open(afile, 'r', 'utf-8') as opened_file:
+    the_relative_file = os.path.join(HERE, afile)
+    with codecs.open(the_relative_file, 'r', 'utf-8') as opened_file:
        content = filter_out_test_code(opened_file)
        content = "".join(list(content))
        return content
--- a/test.bat
+++ b/test.bat
@ -1,2 +1,2 @@
 pip freeze
-nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_io && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long
+nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long
--- a/test.sh
+++ b/test.sh
@ -1,2 +1,2 @@
 pip freeze
-nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_io && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long
+nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long
--- a/tests/test_pyexcel_integration.py
+++ b/tests/test_pyexcel_integration.py
@ -4,6 +4,7 @@ from unittest import TestCase
 from textwrap import dedent

 import pyexcel as pe
+from pyexcel_io._compact import text_type


 class TestDateFormat(TestCase):
@ -125,13 +126,30 @@ class TestSpecialStrings(TestCase):
    """

    def setUp(self):
-        self.content = [['01', 1, 2.0, 3.1]]
+        self.content = [['01', 1, 2.0, 3.1, 'NaN', 'nan']]
        self.test_file = "test_auto_detect_init.csv"
        pe.save_as(array=self.content, dest_file_name=self.test_file)

-    def test_auto_detect_float_false(self):
+    def test_auto_detect_float_true(self):
        sheet = pe.get_sheet(file_name=self.test_file)
-        self.assertEqual(sheet.to_array(), [['01', 1, 2, 3.1]])
+        self.assertEqual(sheet.to_array(),
+                         [['01', 1, 2, 3.1, 'NaN', 'nan']])
+
+    def test_auto_detect_float_false(self):
+        sheet = pe.get_sheet(file_name=self.test_file, auto_detect_float=False)
+        self.assertEqual(sheet.to_array(),
+                         [['01', 1, '2.0', '3.1', 'NaN', 'nan']])
+
+    def test_auto_detect_float_ignore_nan_text(self):
+        sheet = pe.get_sheet(file_name=self.test_file, ignore_nan_text=True)
+        self.assertEqual(sheet.to_array(),
+                         [['01', 1, 2.0, 3.1, 'NaN', 'nan']])
+
+    def test_auto_detect_float_default_float_nan(self):
+        sheet = pe.get_sheet(file_name=self.test_file, default_float_nan="nan")
+        result = sheet.to_array()
+        assert isinstance(result[0][5], float)
+        assert isinstance(result[0][4], text_type)

    def tearDown(self):
        os.unlink(self.test_file)
--- a/tests/test_service.py
+++ b/tests/test_service.py
@ -74,3 +74,18 @@ def test_suppression_of_pep_0515_float():
    eq_(result, None)
    result = detect_float_value('123_123.1')
    eq_(result, None)
+
+
+def test_detect_float_value_on_nan():
+    result = detect_float_value('NaN', ignore_nan_text=True)
+    eq_(result, None)
+
+
+def test_detect_float_value_on_custom_nan_text():
+    result = detect_float_value('NaN', default_float_nan="nan")
+    eq_(result, None)
+
+
+def test_detect_float_value_on_custom_nan_text2():
+    result = detect_float_value('nan', default_float_nan="nan")
+    eq_(str(result), "nan")