sql: normalize phonenumbers in fts index (#76875)
gitea/wcs/pipeline/head This commit looks good
Details
gitea/wcs/pipeline/head This commit looks good
Details
This commit is contained in:
parent
e7737a162c
commit
d94a0f0379
|
@ -4454,10 +4454,15 @@ def test_fts_phone(pub):
|
|||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1
|
||||
formdata = formdef.data_class()()
|
||||
formdata.data = {'1': None, '2': '0123456789'}
|
||||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
|
||||
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
|
||||
|
|
38
wcs/sql.py
38
wcs/sql.py
|
@ -424,26 +424,33 @@ class ILike(Criteria):
|
|||
return '%s ILIKE %%(c%s)s' % (self.attribute, id(self.value))
|
||||
|
||||
|
||||
phone_re = re.compile(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
re.X,
|
||||
)
|
||||
|
||||
|
||||
def normalize_phone_number_for_fts_if_needed(value):
|
||||
phone_match = phone_re.match(value)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
value = value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
return value
|
||||
|
||||
|
||||
class FtsMatch(Criteria):
|
||||
def __init__(self, value, extra_normalize=True, **kwargs):
|
||||
# make Criteria.__repr__ works
|
||||
self.attribute = 'fts'
|
||||
self.value = self.get_fts_value(value)
|
||||
if extra_normalize:
|
||||
phone_match = re.match(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
self.value,
|
||||
re.X,
|
||||
)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
self.value = self.value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
self.value = normalize_phone_number_for_fts_if_needed(self.value)
|
||||
|
||||
@classmethod
|
||||
def get_fts_value(cls, value):
|
||||
|
@ -2843,6 +2850,9 @@ class SqlDataMixin(SqlMixin):
|
|||
if isinstance(value, str) and len(value) < 10000:
|
||||
# avoid overlong strings, typically base64-encoded values
|
||||
fts_strings[weight].add(value)
|
||||
# special case telephone numbers
|
||||
if value != normalize_phone_number_for_fts_if_needed(value):
|
||||
fts_strings[weight].add(normalize_phone_number_for_fts_if_needed(value))
|
||||
elif type(value) in (tuple, list):
|
||||
for val in value:
|
||||
fts_strings[weight].add(val)
|
||||
|
|
Loading…
Reference in New Issue