normalizer les numéros de téléphone dans l'index FTS (#76875) #259
|
@ -5312,10 +5312,15 @@ def test_fts_phone(pub):
|
|||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1
|
||||
formdata = formdef.data_class()()
|
||||
formdata.data = {'1': None, '2': '0123456789'}
|
||||
formdata.just_created()
|
||||
formdata.store()
|
||||
|
||||
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('0123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 2
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
|
||||
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
|
||||
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
|
||||
|
|
|
@ -2619,6 +2619,11 @@ class SqlDataMixin(SqlMixin):
|
|||
if isinstance(value, str) and len(value) < 10000:
|
||||
# avoid overlong strings, typically base64-encoded values
|
||||
fts_strings[weight].add(value)
|
||||
# normalize values looking like phonenumbers, because
|
||||
# phonenumbers are normalized by the FTS criteria
|
||||
if len(value) < 30 and value != normalize_phone_number_for_fts_if_needed(value):
|
||||
bdauvergne marked this conversation as resolved
Outdated
|
||||
# use weight 'D' to give preference to fields with the phonenumber validation
|
||||
fts_strings['D'].add(normalize_phone_number_for_fts_if_needed(value))
|
||||
elif type(value) in (tuple, list):
|
||||
for val in value:
|
||||
fts_strings[weight].add(val)
|
||||
|
|
|
@ -344,26 +344,33 @@ class ILike(Criteria):
|
|||
return '%s ILIKE %%(c%s)s' % (self.attribute, id(self.value))
|
||||
|
||||
|
||||
phone_re = re.compile(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
re.X,
|
||||
)
|
||||
|
||||
|
||||
def normalize_phone_number_for_fts_if_needed(value):
|
||||
phone_match = phone_re.match(value)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
value = value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
return value
|
||||
|
||||
|
||||
class FtsMatch(Criteria):
|
||||
def __init__(self, value, extra_normalize=True, **kwargs):
|
||||
# make Criteria.__repr__ works
|
||||
self.attribute = 'fts'
|
||||
self.value = self.get_fts_value(value)
|
||||
if extra_normalize:
|
||||
phone_match = re.match(
|
||||
r'''.*?(?P<phone> # a phone number
|
||||
((\+[1-9])|(\b0)) # starting with an international prefix, or 0
|
||||
[-\(\)\d\.\s/]{6,20} # then a bunch of numbers/symbols
|
||||
\b) # till the end of the "word"''',
|
||||
self.value,
|
||||
re.X,
|
||||
)
|
||||
if phone_match and not re.match(r'^\d+-\d+$', phone_match.group('phone').strip()):
|
||||
# if it looks like a phone number, normalize it to its
|
||||
# "international/E164" format to match what's stored in the
|
||||
# database.
|
||||
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
|
||||
self.value = self.value.replace(phone_match.group('phone').strip(), phone_value)
|
||||
self.value = normalize_phone_number_for_fts_if_needed(self.value)
|
||||
|
||||
@classmethod
|
||||
def get_fts_value(cls, value):
|
||||
|
|
Loading…
Reference in New Issue
Ça m'irait bien d'ajouter à la comparaison un truc genre len(value) < 30, limiter le nombre de fois où on passe là-dedans.
Comme on peut encore commenter un peu, à la rerelecture je me dis qu'on devrait poser ça en priorité "D" (
fts_strings['D'].add(...)
) pour permettre de prioriser les numéros de téléphones officiels (les champs avec validation téléphonique).Au doigt mouillé je n'ai pas l'impression qu'un len(value) < 30 changera énormément le nombre d'appels, même un len(value) < 15 (10 chiffres plus des séparateurs entre groupe de deux) ça prendra je pense la plupart des tokens.
Ça par contre ça ne me semble pas poser de souci oui.
Un < 15 m'irait aussi, je tapais juste large. (mon truc étant d'éviter d'envoyer les adresses, les champs commentaires libres, etc. vers la fonction qui va jouer deux regex dessus.
Je retire ce que je dis, c'est la valeur du champ complet qui est traité, je pensais qu'on faisait un split() avant. Ok, donc.