support additional options when detecting float values in csv format. fix #49

This commit is contained in:
chfw 2018-08-16 18:06:48 +01:00
parent d2e1e8ad99
commit 8edce68272
5 changed files with 63 additions and 6 deletions

View File

@ -32,6 +32,9 @@ def iget_data(afile, file_type=None, **keywords):
:param auto_detect_int: defaults to True
:param auto_detect_datetime: defaults to True
:param ignore_infinity: defaults to True
:param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
:param default_float_nan: choose one form of 'NaN', 'nan'
:param pep_0515_off: turn off pep 0515. default to True.
:param keywords: any other library specific parameters
:returns: an ordered dictionary
"""

View File

@ -131,6 +131,8 @@ class CSVSheetReader(SheetReader):
auto_detect_int=True,
auto_detect_datetime=True,
pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None,
**keywords
):
SheetReader.__init__(self, sheet, **keywords)
@ -141,6 +143,8 @@ class CSVSheetReader(SheetReader):
self.__auto_detect_datetime = auto_detect_datetime
self.__file_handle = None
self.__pep_0515_off = pep_0515_off
self.__ignore_nan_text = ignore_nan_text
self.__default_float_nan = default_float_nan
def get_file_handle(self):
""" return me unicde reader for csv """
@ -164,7 +168,9 @@ class CSVSheetReader(SheetReader):
ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
if ret is None and self.__auto_detect_float:
ret = service.detect_float_value(
csv_cell_text, self.__pep_0515_off
csv_cell_text, self.__pep_0515_off,
ignore_nan_text=self.__ignore_nan_text,
default_float_nan=self.__default_float_nan
)
shall_we_ignore_the_conversion = (
(ret in [float("inf"), float("-inf")])

View File

@ -39,7 +39,10 @@ def detect_date_value(cell_text):
return ret
def detect_float_value(cell_text, pep_0515_off=True):
def detect_float_value(
cell_text, pep_0515_off=True,
ignore_nan_text=False,
default_float_nan=None):
should_we_skip_it = (
cell_text.startswith("0") and cell_text.startswith("0.") is False
)
@ -54,7 +57,19 @@ def detect_float_value(cell_text, pep_0515_off=True):
return None
try:
return float(cell_text)
if ignore_nan_text:
if cell_text.lower() == "nan":
return None
else:
return float(cell_text)
else:
if cell_text.lower() == "nan":
if cell_text == default_float_nan:
return float("NaN")
else:
return None
else:
return float(cell_text)
except ValueError:
return None

View File

@ -4,6 +4,7 @@ from unittest import TestCase
from textwrap import dedent
import pyexcel as pe
from pyexcel_io._compact import text_type
class TestDateFormat(TestCase):
@ -125,13 +126,30 @@ class TestSpecialStrings(TestCase):
"""
def setUp(self):
self.content = [['01', 1, 2.0, 3.1]]
self.content = [['01', 1, 2.0, 3.1, 'NaN', 'nan']]
self.test_file = "test_auto_detect_init.csv"
pe.save_as(array=self.content, dest_file_name=self.test_file)
def test_auto_detect_float_false(self):
def test_auto_detect_float_true(self):
sheet = pe.get_sheet(file_name=self.test_file)
self.assertEqual(sheet.to_array(), [['01', 1, 2, 3.1]])
self.assertEqual(sheet.to_array(),
[['01', 1, 2, 3.1, 'NaN', 'nan']])
def test_auto_detect_float_false(self):
sheet = pe.get_sheet(file_name=self.test_file, auto_detect_float=False)
self.assertEqual(sheet.to_array(),
[['01', 1, '2.0', '3.1', 'NaN', 'nan']])
def test_auto_detect_float_ignore_nan_text(self):
sheet = pe.get_sheet(file_name=self.test_file, ignore_nan_text=True)
self.assertEqual(sheet.to_array(),
[['01', 1, 2.0, 3.1, 'NaN', 'nan']])
def test_auto_detect_float_default_float_nan(self):
sheet = pe.get_sheet(file_name=self.test_file, default_float_nan="nan")
result = sheet.to_array()
assert isinstance(result[0][5], float)
assert isinstance(result[0][4], text_type)
def tearDown(self):
os.unlink(self.test_file)

View File

@ -74,3 +74,18 @@ def test_suppression_of_pep_0515_float():
eq_(result, None)
result = detect_float_value('123_123.1')
eq_(result, None)
def test_detect_float_value_on_nan():
result = detect_float_value('NaN', ignore_nan_text=True)
eq_(result, None)
def test_detect_float_value_on_custom_nan_text():
result = detect_float_value('NaN', default_float_nan="nan")
eq_(result, None)
def test_detect_float_value_on_custom_nan_text2():
result = detect_float_value('nan', default_float_nan="nan")
eq_(str(result), "nan")