factor out function to clean text for open document (#36627)

This commit is contained in:
Frédéric Péters 2020-02-08 10:30:39 +01:00
parent a06e04dbf8
commit 9bd888c708
1 changed files with 13 additions and 11 deletions

View File

@ -39,6 +39,17 @@ for prefix, uri in NS.items():
ET.register_namespace(prefix, uri)
def clean_text(value):
for i in range(0x20): # remove control characters
char = chr(i)
if char in ('\t', '\r', '\n'):
# only allow tab, carriage return and line feed.
continue
value = value.replace(char, '')
# fffe and ffff are also invalid characters
return value.replace('\ufffe', '').replace('\uffff', '')
def is_number(value):
if value and (value.startswith('0') or value.startswith('+')):
return False
@ -167,19 +178,10 @@ class WorkSheet(object):
class WorkCell(object):
def __init__(self, worksheet, value, formdata=None, data_field=None, native_value=None):
self.worksheet = worksheet
if value is None:
value = ''
value = force_text(value, 'utf-8')
self.worksheet = worksheet
for i in range(0x20): # remove control characters
char = chr(i)
if char in ('\t', '\r', '\n'):
# only allow tab, carriage return and line feed.
continue
value = value.replace(char, '')
# fffe and ffff are also invalid characters
value = value.replace('\ufffe', '').replace('\uffff', '')
self.value = value
self.value = clean_text(force_text(value, 'utf-8'))
self.formdata = formdata
self.data_field = data_field
self.native_value = native_value