sql: normalize phonenumbers in fts index (#76875)
gitea/wcs/pipeline/head This commit looks good
Details
gitea/wcs/pipeline/head This commit looks good
Details
* restricted to values of less than 30 characters * indexed with weight 'D' to decrease the score compared to field with the phonenumber validation
This commit is contained in:
parent
87e3e9aa51
commit
2d619766b7
|
@ -5328,10 +5328,15 @@ def test_fts_phone(pub):
|
|||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1
|
||||
formdata = formdef.data_class()()
|
||||
formdata.data = {'1': None, '2': '0123456789'}
|
||||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
|
||||
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
|
||||
|
|
|
@ -2624,6 +2624,11 @@ class SqlDataMixin(SqlMixin):
|
|||
if isinstance(value, str) and len(value) < 10000:
|
||||
# avoid overlong strings, typically base64-encoded values
|
||||
fts_strings[weight].add(value)
|
||||
# normalize values looking like phonenumbers, because
|
||||
# phonenumbers are normalized by the FTS criteria
|
||||
if len(value) < 30 and value != normalize_phone_number_for_fts_if_needed(value):
|
||||
# use weight 'D' to give preference to fields with the phonenumber validation
|
||||
fts_strings['D'].add(normalize_phone_number_for_fts_if_needed(value))
|
||||
elif type(value) in (tuple, list):
|
||||
for val in value:
|
||||
fts_strings[weight].add(val)
|
||||
|
|
|
@ -343,26 +343,33 @@ class ILike(Criteria):
|
|||
self.value = '%' + like_escape(self.value) + '%'
|
||||
|
||||
|
||||
phone_re = re.compile(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
re.X,
|
||||
)
|
||||
|
||||
|
||||
def normalize_phone_number_for_fts_if_needed(value):
|
||||
phone_match = phone_re.match(value)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
value = value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
return value
|
||||
|
||||
|
||||
class FtsMatch(Criteria):
|
||||
def __init__(self, value, extra_normalize=True, **kwargs):
|
||||
# make Criteria.__repr__ works
|
||||
self.attribute = 'fts'
|
||||
self.value = self.get_fts_value(value)
|
||||
if extra_normalize:
|
||||
phone_match = re.match(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
self.value,
|
||||
re.X,
|
||||
)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
self.value = self.value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
self.value = normalize_phone_number_for_fts_if_needed(self.value)
|
||||
|
||||
@classmethod
|
||||
def get_fts_value(cls, value):
|
||||
|
|
Loading…
Reference in New Issue