Merge pull request #50 from pyexcel/dev

release 0.5.8
This commit is contained in:
jaska 2018-08-16 19:31:22 +01:00 committed by GitHub
commit 0d1151e79c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 274 additions and 44 deletions

View File

@ -4,7 +4,7 @@ configuration:
- "commons/templates"
- "setupmobans/templates"
- ".moban.d"
configuration: pyexcel_io.yaml
configuration: pyexcel-io.yml
targets:
- "docs/source/conf.py": "docs/source/conf.py"
- setup.py: setup.py

View File

@ -4,6 +4,7 @@ notifications:
email: false
python:
- pypy-5.3.1
- 3.7-dev
- 3.6
- 3.5
- 3.4

View File

@ -1,6 +1,16 @@
Change log
================================================================================
0.5.8 - unreleased
--------------------------------------------------------------------------------
fixed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#. `#49 <https://github.com/pyexcel/pyexcel-io/issues/49>`_, support additional
options when detecting float values in csv format. default_float_nan,
ignore_nan_text
0.5.7 - 02.05.2018
--------------------------------------------------------------------------------

View File

@ -13,7 +13,7 @@ that the following conditions are met:
and/or other materials provided with the distribution.
* Neither the name of 'pyexcel-io' nor the names of the contributors
may not be used to endorse or promote products derived from this software
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND
@ -27,4 +27,4 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
DAMAGE.

View File

@ -1,6 +1,12 @@
name: pyexcel-io
organisation: pyexcel
releases:
- changes:
- action: fixed
details:
- '`#49`, support additional options when detecting float values in csv format. default_float_nan, ignore_nan_text'
date: unreleased
version: 0.5.8
- changes:
- action: fixed
details:

View File

@ -4,26 +4,189 @@ DESCRIPTION = (
'format and to/from databases' +
''
)
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = u'pyexcel-io'
copyright = u'2015-2018 Onni Software Ltd.'
author = u'C.W.'
# The short X.Y version
version = u'0.5.8'
# The full version, including alpha/beta/rc tags
release = u'0.5.8'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.viewcode',
'sphinx.ext.autosummary'
]
intersphinx_mapping = {
'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None),
}
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
project = u'pyexcel-io'
copyright = u'2015-2018 Onni Software Ltd.'
version = '0.5.7'
release = '0.5.7'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'en'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'pyexcel-iodoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pyexcel-io.tex', u'pyexcel-io Documentation',
u'Onni Software Ltd.', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pyexcel-io', u'pyexcel-io Documentation',
author, 'pyexcel-io', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# -- Extension configuration -------------------------------------------------
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
# TODO: html_theme not configurable upstream
html_theme = 'default'
@ -31,19 +194,8 @@ def setup(app):
app.add_stylesheet('theme_overrides.css')
html_static_path = ['_static']
htmlhelp_basename = 'pyexcel-iodoc'
latex_elements = {}
latex_documents = [
('index', 'pyexcel-io.tex',
'pyexcel-io Documentation',
'Onni Software Ltd.', 'manual'),
]
man_pages = [
('index', 'pyexcel-io',
'pyexcel-io Documentation',
[u'Onni Software Ltd.'], 1)
]
# TODO: DESCRIPTION not configurable upstream
texinfo_documents = [
('index', 'pyexcel-io',
'pyexcel-io Documentation',
@ -51,3 +203,6 @@ texinfo_documents = [
DESCRIPTION,
'Miscellaneous'),
]
intersphinx_mapping.update({
'pyexcel': ('http://pyexcel.readthedocs.io/en/latest/', None),
})

View File

@ -1,9 +1,9 @@
overrides: "pyexcel.yaml"
name: "pyexcel-io"
nick_name: io
version: 0.5.7
current_version: 0.5.7
release: 0.5.7
version: 0.5.8
current_version: 0.5.8
release: 0.5.8
dependencies:
- ordereddict;python_version<"2.7"
- lml==0.0.1

View File

@ -32,6 +32,9 @@ def iget_data(afile, file_type=None, **keywords):
:param auto_detect_int: defaults to True
:param auto_detect_datetime: defaults to True
:param ignore_infinity: defaults to True
:param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
:param default_float_nan: choose one form of 'NaN', 'nan'
:param pep_0515_off: turn off pep 0515. default to True.
:param keywords: any other library specific parameters
:returns: an ordered dictionary
"""

View File

@ -131,6 +131,8 @@ class CSVSheetReader(SheetReader):
auto_detect_int=True,
auto_detect_datetime=True,
pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None,
**keywords
):
SheetReader.__init__(self, sheet, **keywords)
@ -141,6 +143,8 @@ class CSVSheetReader(SheetReader):
self.__auto_detect_datetime = auto_detect_datetime
self.__file_handle = None
self.__pep_0515_off = pep_0515_off
self.__ignore_nan_text = ignore_nan_text
self.__default_float_nan = default_float_nan
def get_file_handle(self):
""" return me unicde reader for csv """
@ -164,7 +168,9 @@ class CSVSheetReader(SheetReader):
ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(
csv_cell_text, self.__pep_0515_off
csv_cell_text, self.__pep_0515_off,
ignore_nan_text=self.__ignore_nan_text,
default_float_nan=self.__default_float_nan
)
shall_we_ignore_the_conversion = (
(ret in [float("inf"), float("-inf")])

View File

@ -39,7 +39,10 @@ def detect_date_value(cell_text):
return ret
def detect_float_value(cell_text, pep_0515_off=True):
def detect_float_value(
cell_text, pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None):
should_we_skip_it = (
cell_text.startswith("0") and cell_text.startswith("0.") is False
)
@ -54,7 +57,19 @@ def detect_float_value(cell_text, pep_0515_off=True):
return None
try:
return float(cell_text)
if ignore_nan_text:
if cell_text.lower() == "nan":
return None
else:
return float(cell_text)
else:
if cell_text.lower() == "nan":
if cell_text == default_float_nan:
return float("NaN")
else:
return None
else:
return float(cell_text)
except ValueError:
return None

View File

@ -1,4 +1,6 @@
# Template by setupmobans
#!/usr/bin/env python3
# Template by pypi-mobans
import os
import sys
import codecs
@ -9,7 +11,7 @@ PY26 = PY2 and sys.version_info[1] < 7
NAME = 'pyexcel-io'
AUTHOR = 'C.W.'
VERSION = '0.5.7'
VERSION = '0.5.8'
EMAIL = 'wangc_2011@hotmail.com'
LICENSE = 'New BSD'
DESCRIPTION = (
@ -17,7 +19,7 @@ DESCRIPTION = (
'format and to/from databases'
)
URL = 'https://github.com/pyexcel/pyexcel-io'
DOWNLOAD_URL = '%s/archive/0.5.7.tar.gz' % URL
DOWNLOAD_URL = '%s/archive/0.5.8.tar.gz' % URL
FILES = ['README.rst', 'CHANGELOG.rst']
KEYWORDS = [
'API',
@ -27,12 +29,10 @@ KEYWORDS = [
'csvz',
'django',
'sqlalchemy',
'python'
'python',
]
CLASSIFIERS = [
'Topic :: Office/Business',
'Topic :: Utilities',
'Topic :: Software Development :: Libraries',
'Programming Language :: Python',
'Intended Audience :: Developers',
@ -63,8 +63,8 @@ EXTRAS_REQUIRE = {
# You do not need to read beyond this line
PUBLISH_COMMAND = '{0} setup.py sdist bdist_wheel upload -r pypi'.format(
sys.executable)
GS_COMMAND = ('gs pyexcel-io v0.5.7 ' +
"Find 0.5.7 in changelog for more details")
GS_COMMAND = ('gs pyexcel-io v0.5.8 ' +
"Find 0.5.8 in changelog for more details")
NO_GS_MESSAGE = ('Automatic github release is disabled. ' +
'Please install gease to enable it.')
UPLOAD_FAILED_MSG = (
@ -140,7 +140,8 @@ def read_files(*files):
def read(afile):
"""Read a file into setup"""
with codecs.open(afile, 'r', 'utf-8') as opened_file:
the_relative_file = os.path.join(HERE, afile)
with codecs.open(the_relative_file, 'r', 'utf-8') as opened_file:
content = filter_out_test_code(opened_file)
content = "".join(list(content))
return content

View File

@ -1,2 +1,2 @@
pip freeze
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_io && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long

View File

@ -1,2 +1,2 @@
pip freeze
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst tests docs/source pyexcel_io && flake8 . --exclude=.moban.d --builtins=unicode,xrange,long
nosetests --with-coverage --cover-package pyexcel_io --cover-package tests --with-doctest --doctest-extension=.rst README.rst docs/source pyexcel_io && flake8 . --exclude=.moban.d,docs --builtins=unicode,xrange,long

View File

@ -4,6 +4,7 @@ from unittest import TestCase
from textwrap import dedent
import pyexcel as pe
from pyexcel_io._compact import text_type
class TestDateFormat(TestCase):
@ -125,13 +126,30 @@ class TestSpecialStrings(TestCase):
"""
def setUp(self):
self.content = [['01', 1, 2.0, 3.1]]
self.content = [['01', 1, 2.0, 3.1, 'NaN', 'nan']]
self.test_file = "test_auto_detect_init.csv"
pe.save_as(array=self.content, dest_file_name=self.test_file)
def test_auto_detect_float_false(self):
def test_auto_detect_float_true(self):
sheet = pe.get_sheet(file_name=self.test_file)
self.assertEqual(sheet.to_array(), [['01', 1, 2, 3.1]])
self.assertEqual(sheet.to_array(),
[['01', 1, 2, 3.1, 'NaN', 'nan']])
def test_auto_detect_float_false(self):
sheet = pe.get_sheet(file_name=self.test_file, auto_detect_float=False)
self.assertEqual(sheet.to_array(),
[['01', 1, '2.0', '3.1', 'NaN', 'nan']])
def test_auto_detect_float_ignore_nan_text(self):
sheet = pe.get_sheet(file_name=self.test_file, ignore_nan_text=True)
self.assertEqual(sheet.to_array(),
[['01', 1, 2.0, 3.1, 'NaN', 'nan']])
def test_auto_detect_float_default_float_nan(self):
sheet = pe.get_sheet(file_name=self.test_file, default_float_nan="nan")
result = sheet.to_array()
assert isinstance(result[0][5], float)
assert isinstance(result[0][4], text_type)
def tearDown(self):
os.unlink(self.test_file)

View File

@ -74,3 +74,18 @@ def test_suppression_of_pep_0515_float():
eq_(result, None)
result = detect_float_value('123_123.1')
eq_(result, None)
def test_detect_float_value_on_nan():
result = detect_float_value('NaN', ignore_nan_text=True)
eq_(result, None)
def test_detect_float_value_on_custom_nan_text():
result = detect_float_value('NaN', default_float_nan="nan")
eq_(result, None)
def test_detect_float_value_on_custom_nan_text2():
result = detect_float_value('nan', default_float_nan="nan")
eq_(str(result), "nan")