sql: normalize phonenumbers in fts index (#76875)
gitea/wcs/pipeline/head This commit looks good
Details
gitea/wcs/pipeline/head This commit looks good
Details
* restricted to values of less than 30 characters * indexed with weight 'D' to decrease the score compared to field with the phonenumber validation
This commit is contained in:
parent
87e3e9aa51
commit
2d619766b7
|
@ -5328,10 +5328,15 @@ def test_fts_phone(pub):
|
||||||
formdata.just_created()
|
formdata.just_created()
|
||||||
formdata.store()
|
formdata.store()
|
||||||
|
|
||||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1
|
formdata = formdef.data_class()()
|
||||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 1
|
formdata.data = {'1': None, '2': '0123456789'}
|
||||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1
|
formdata.just_created()
|
||||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1
|
formdata.store()
|
||||||
|
|
||||||
|
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 2
|
||||||
|
assert formdef.data_class().count([FtsMatch('0123456789')]) == 2
|
||||||
|
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 2
|
||||||
|
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 2
|
||||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
|
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
|
||||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
|
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
|
||||||
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
|
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
|
||||||
|
|
|
@ -2624,6 +2624,11 @@ class SqlDataMixin(SqlMixin):
|
||||||
if isinstance(value, str) and len(value) < 10000:
|
if isinstance(value, str) and len(value) < 10000:
|
||||||
# avoid overlong strings, typically base64-encoded values
|
# avoid overlong strings, typically base64-encoded values
|
||||||
fts_strings[weight].add(value)
|
fts_strings[weight].add(value)
|
||||||
|
# normalize values looking like phonenumbers, because
|
||||||
|
# phonenumbers are normalized by the FTS criteria
|
||||||
|
if len(value) < 30 and value != normalize_phone_number_for_fts_if_needed(value):
|
||||||
|
# use weight 'D' to give preference to fields with the phonenumber validation
|
||||||
|
fts_strings['D'].add(normalize_phone_number_for_fts_if_needed(value))
|
||||||
elif type(value) in (tuple, list):
|
elif type(value) in (tuple, list):
|
||||||
for val in value:
|
for val in value:
|
||||||
fts_strings[weight].add(val)
|
fts_strings[weight].add(val)
|
||||||
|
|
|
@ -343,26 +343,33 @@ class ILike(Criteria):
|
||||||
self.value = '%' + like_escape(self.value) + '%'
|
self.value = '%' + like_escape(self.value) + '%'
|
||||||
|
|
||||||
|
|
||||||
|
phone_re = re.compile(
|
||||||
|
r'''.*?(?P<phone> # a phone number
|
||||||
|
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||||
|
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||||
|
\b) # till the end of the "word"''',
|
||||||
|
re.X,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_phone_number_for_fts_if_needed(value):
|
||||||
|
phone_match = phone_re.match(value)
|
||||||
|
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||||
|
# if it looks like a phone number, normalize it to its
|
||||||
|
# "international/E164" format to match what's stored in the
|
||||||
|
# database.
|
||||||
|
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||||
|
value = value.replace(phone_match.group('phone').strip(), phone_value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class FtsMatch(Criteria):
|
class FtsMatch(Criteria):
|
||||||
def __init__(self, value, extra_normalize=True, **kwargs):
|
def __init__(self, value, extra_normalize=True, **kwargs):
|
||||||
# make Criteria.__repr__ works
|
# make Criteria.__repr__ works
|
||||||
self.attribute = 'fts'
|
self.attribute = 'fts'
|
||||||
self.value = self.get_fts_value(value)
|
self.value = self.get_fts_value(value)
|
||||||
if extra_normalize:
|
if extra_normalize:
|
||||||
phone_match = re.match(
|
self.value = normalize_phone_number_for_fts_if_needed(self.value)
|
||||||
r'''.*?(?P<phone> # a phone number
|
|
||||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
|
||||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
|
||||||
\b) # till the end of the "word"''',
|
|
||||||
self.value,
|
|
||||||
re.X,
|
|
||||||
)
|
|
||||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
|
||||||
# if it looks like a phone number, normalize it to its
|
|
||||||
# "international/E164" format to match what's stored in the
|
|
||||||
# database.
|
|
||||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
|
||||||
self.value = self.value.replace(phone_match.group('phone').strip(), phone_value)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_fts_value(cls, value):
|
def get_fts_value(cls, value):
|
||||||
|
|
Loading…
Reference in New Issue