✨ support additional options when detecting float values in csv format. fix #49
This commit is contained in:
parent
d2e1e8ad99
commit
8edce68272
|
@ -32,6 +32,9 @@ def iget_data(afile, file_type=None, **keywords):
|
|||
:param auto_detect_int: defaults to True
|
||||
:param auto_detect_datetime: defaults to True
|
||||
:param ignore_infinity: defaults to True
|
||||
:param ignore_nan_text: various forms of 'NaN', 'nan' are ignored
|
||||
:param default_float_nan: choose one form of 'NaN', 'nan'
|
||||
:param pep_0515_off: turn off pep 0515. default to True.
|
||||
:param keywords: any other library specific parameters
|
||||
:returns: an ordered dictionary
|
||||
"""
|
||||
|
|
|
@ -131,6 +131,8 @@ class CSVSheetReader(SheetReader):
|
|||
auto_detect_int=True,
|
||||
auto_detect_datetime=True,
|
||||
pep_0515_off=True,
|
||||
ignore_nan_text=False,
|
||||
default_float_nan=None,
|
||||
**keywords
|
||||
):
|
||||
SheetReader.__init__(self, sheet, **keywords)
|
||||
|
@ -141,6 +143,8 @@ class CSVSheetReader(SheetReader):
|
|||
self.__auto_detect_datetime = auto_detect_datetime
|
||||
self.__file_handle = None
|
||||
self.__pep_0515_off = pep_0515_off
|
||||
self.__ignore_nan_text = ignore_nan_text
|
||||
self.__default_float_nan = default_float_nan
|
||||
|
||||
def get_file_handle(self):
|
||||
""" return me unicde reader for csv """
|
||||
|
@ -164,7 +168,9 @@ class CSVSheetReader(SheetReader):
|
|||
ret = service.detect_int_value(csv_cell_text, self.__pep_0515_off)
|
||||
if ret is None and self.__auto_detect_float:
|
||||
ret = service.detect_float_value(
|
||||
csv_cell_text, self.__pep_0515_off
|
||||
csv_cell_text, self.__pep_0515_off,
|
||||
ignore_nan_text=self.__ignore_nan_text,
|
||||
default_float_nan=self.__default_float_nan
|
||||
)
|
||||
shall_we_ignore_the_conversion = (
|
||||
(ret in [float("inf"), float("-inf")])
|
||||
|
|
|
@ -39,7 +39,10 @@ def detect_date_value(cell_text):
|
|||
return ret
|
||||
|
||||
|
||||
def detect_float_value(cell_text, pep_0515_off=True):
|
||||
def detect_float_value(
|
||||
cell_text, pep_0515_off=True,
|
||||
ignore_nan_text=False,
|
||||
default_float_nan=None):
|
||||
should_we_skip_it = (
|
||||
cell_text.startswith("0") and cell_text.startswith("0.") is False
|
||||
)
|
||||
|
@ -54,7 +57,19 @@ def detect_float_value(cell_text, pep_0515_off=True):
|
|||
return None
|
||||
|
||||
try:
|
||||
return float(cell_text)
|
||||
if ignore_nan_text:
|
||||
if cell_text.lower() == "nan":
|
||||
return None
|
||||
else:
|
||||
return float(cell_text)
|
||||
else:
|
||||
if cell_text.lower() == "nan":
|
||||
if cell_text == default_float_nan:
|
||||
return float("NaN")
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return float(cell_text)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ from unittest import TestCase
|
|||
from textwrap import dedent
|
||||
|
||||
import pyexcel as pe
|
||||
from pyexcel_io._compact import text_type
|
||||
|
||||
|
||||
class TestDateFormat(TestCase):
|
||||
|
@ -125,13 +126,30 @@ class TestSpecialStrings(TestCase):
|
|||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.content = [['01', 1, 2.0, 3.1]]
|
||||
self.content = [['01', 1, 2.0, 3.1, 'NaN', 'nan']]
|
||||
self.test_file = "test_auto_detect_init.csv"
|
||||
pe.save_as(array=self.content, dest_file_name=self.test_file)
|
||||
|
||||
def test_auto_detect_float_false(self):
|
||||
def test_auto_detect_float_true(self):
|
||||
sheet = pe.get_sheet(file_name=self.test_file)
|
||||
self.assertEqual(sheet.to_array(), [['01', 1, 2, 3.1]])
|
||||
self.assertEqual(sheet.to_array(),
|
||||
[['01', 1, 2, 3.1, 'NaN', 'nan']])
|
||||
|
||||
def test_auto_detect_float_false(self):
|
||||
sheet = pe.get_sheet(file_name=self.test_file, auto_detect_float=False)
|
||||
self.assertEqual(sheet.to_array(),
|
||||
[['01', 1, '2.0', '3.1', 'NaN', 'nan']])
|
||||
|
||||
def test_auto_detect_float_ignore_nan_text(self):
|
||||
sheet = pe.get_sheet(file_name=self.test_file, ignore_nan_text=True)
|
||||
self.assertEqual(sheet.to_array(),
|
||||
[['01', 1, 2.0, 3.1, 'NaN', 'nan']])
|
||||
|
||||
def test_auto_detect_float_default_float_nan(self):
|
||||
sheet = pe.get_sheet(file_name=self.test_file, default_float_nan="nan")
|
||||
result = sheet.to_array()
|
||||
assert isinstance(result[0][5], float)
|
||||
assert isinstance(result[0][4], text_type)
|
||||
|
||||
def tearDown(self):
|
||||
os.unlink(self.test_file)
|
||||
|
|
|
@ -74,3 +74,18 @@ def test_suppression_of_pep_0515_float():
|
|||
eq_(result, None)
|
||||
result = detect_float_value('123_123.1')
|
||||
eq_(result, None)
|
||||
|
||||
|
||||
def test_detect_float_value_on_nan():
|
||||
result = detect_float_value('NaN', ignore_nan_text=True)
|
||||
eq_(result, None)
|
||||
|
||||
|
||||
def test_detect_float_value_on_custom_nan_text():
|
||||
result = detect_float_value('NaN', default_float_nan="nan")
|
||||
eq_(result, None)
|
||||
|
||||
|
||||
def test_detect_float_value_on_custom_nan_text2():
|
||||
result = detect_float_value('nan', default_float_nan="nan")
|
||||
eq_(str(result), "nan")
|
||||
|
|
Loading…
Reference in New Issue