diff --git a/tests/test_formdata.py b/tests/test_formdata.py index f7b8ff181..b1a408de9 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -5328,10 +5328,15 @@ def test_fts_phone(pub): formdata.just_created() formdata.store() - assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1 - assert formdef.data_class().count([FtsMatch('0123456789')]) == 1 - assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1 - assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1 + formdata = formdef.data_class()() + formdata.data = {'1': None, '2': '0123456789'} + formdata.just_created() + formdata.store() + + assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 2 + assert formdef.data_class().count([FtsMatch('0123456789')]) == 2 + assert formdef.data_class().count([FtsMatch('+33123456789')]) == 2 + assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 2 assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1 assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0 assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1 diff --git a/wcs/sql.py b/wcs/sql.py index addacd772..9a4c00dd8 100644 --- a/wcs/sql.py +++ b/wcs/sql.py @@ -2624,6 +2624,11 @@ class SqlDataMixin(SqlMixin): if isinstance(value, str) and len(value) < 10000: # avoid overlong strings, typically base64-encoded values fts_strings[weight].add(value) + # normalize values looking like phonenumbers, because + # phonenumbers are normalized by the FTS criteria + if len(value) < 30 and value != normalize_phone_number_for_fts_if_needed(value): + # use weight 'D' to give preference to fields with the phonenumber validation + fts_strings['D'].add(normalize_phone_number_for_fts_if_needed(value)) elif type(value) in (tuple, list): for val in value: fts_strings[weight].add(val) diff --git a/wcs/sql_criterias.py b/wcs/sql_criterias.py index a75f7500a..3333eb2c5 100644 --- a/wcs/sql_criterias.py +++ b/wcs/sql_criterias.py @@ -343,26 +343,33 @@ class ILike(Criteria): self.value = '%' + like_escape(self.value) + '%' +phone_re = re.compile( + r'''.*?(?P # a phone number + ((\+[1-9])|(\b0)) # starting with an international prefix, or 0 + [-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols + \b) # till the end of the "word"''', + re.X, +) + + +def normalize_phone_number_for_fts_if_needed(value): + phone_match = phone_re.match(value) + if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()): + # if it looks like a phone number, normalize it to its + # "international/E164" format to match what's stored in the + # database. + phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip()) + value = value.replace(phone_match.group('phone').strip(), phone_value) + return value + + class FtsMatch(Criteria): def __init__(self, value, extra_normalize=True, **kwargs): # make Criteria.__repr__ works self.attribute = 'fts' self.value = self.get_fts_value(value) if extra_normalize: - phone_match = re.match( - r'''.*?(?P # a phone number - ((\+[1-9])|(\b0)) # starting with an international prefix, or 0 - [-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols - \b) # till the end of the "word"''', - self.value, - re.X, - ) - if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()): - # if it looks like a phone number, normalize it to its - # "international/E164" format to match what's stored in the - # database. - phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip()) - self.value = self.value.replace(phone_match.group('phone').strip(), phone_value) + self.value = normalize_phone_number_for_fts_if_needed(self.value) @classmethod def get_fts_value(cls, value):