zoo/zoo/zoo_nanterre/doublons.py

28 lines
842 B
Python

from zoo.zoo_data.models import Entity
def doublons(schema, *args):
'''Search for duplicate entities based on a list of field paths'''
qs = Entity.objects.filter(schema=schema)
paths = schema.paths_to_strings()
for arg in args:
if arg not in paths:
raise ValueError('%s is not a path to a string value' % arg)
def value(item, path):
for elt in path:
item = item[elt]
return item
seen = set()
for item in qs:
if item.pk in seen:
continue
for arg in args:
kwargs = {}
for path in paths:
kwargs['__'.join(path)] = value(item.content, path)
for doublon in qs.exclude(pk=item.pk).content_search(schema, **kwargs):
yield item, doublon
seen.add(doublon.pk)