98 lines
3.1 KiB
Python
98 lines
3.1 KiB
Python
# zoo - versatile objects management
|
|
# Copyright (C) 2016 Entr'ouvert
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it
|
|
# under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
from django.utils.translation import ugettext_lazy as _
|
|
from django.contrib.postgres.fields import JSONField
|
|
from django.db import models
|
|
|
|
from .utils import pair_sort
|
|
|
|
|
|
class Duplicate(models.Model):
|
|
STATE_NEW = 0
|
|
STATE_FALSE_POSITIVE = 1
|
|
STATE_DEDUP = 2
|
|
|
|
STATES = [
|
|
(STATE_NEW, _('new')),
|
|
(STATE_FALSE_POSITIVE, _('false positive')),
|
|
(STATE_DEDUP, _('deduplicated')),
|
|
]
|
|
|
|
created = models.DateTimeField(
|
|
verbose_name=_('created'),
|
|
auto_now_add=True,
|
|
db_index=True)
|
|
modified = models.DateTimeField(
|
|
verbose_name=_('modified'),
|
|
auto_now=True)
|
|
|
|
first = models.ForeignKey(
|
|
'zoo_data.Entity',
|
|
verbose_name=_('first'),
|
|
related_name='+',
|
|
on_delete=models.CASCADE)
|
|
second = models.ForeignKey(
|
|
'zoo_data.Entity',
|
|
verbose_name=_('second'),
|
|
related_name='+',
|
|
on_delete=models.CASCADE)
|
|
|
|
score = models.DecimalField(
|
|
decimal_places=2,
|
|
max_digits=5,
|
|
verbose_name=_('score'))
|
|
state = models.PositiveIntegerField(
|
|
verbose_name=_('state'),
|
|
default=STATE_NEW,
|
|
choices=STATES)
|
|
deduplicated = models.DateTimeField(
|
|
verbose_name=_('deduplicated'),
|
|
null=True)
|
|
content = JSONField(
|
|
verbose_name=_('content'),
|
|
default={})
|
|
|
|
def save(self, *args, **kwargs):
|
|
# reorder first and second on save
|
|
# to enforce invariant first_id < second_id
|
|
assert self.first_id != self.second_id, 'same id in duplicate'
|
|
self.first_id, self.second_id = pair_sort(self.first_id, self.second_id)
|
|
return super(Duplicate, self).save(*args, **kwargs)
|
|
|
|
def dedup(self, choice):
|
|
assert choice in [1, 2], 'choice is not in [1, 2]'
|
|
assert self.state == self.STATE_NEW, 'state is not new'
|
|
self.state = self.STATE_DEDUP
|
|
self.content['dedup_choice'] = choice
|
|
self.save()
|
|
if choice == 1:
|
|
keep, forget = self.first, self.second
|
|
else:
|
|
keep, forget = self.second, self.first
|
|
return keep, forget
|
|
|
|
def false_positive(self):
|
|
assert self.state == self.STATE_NEW, 'state is not new'
|
|
self.state = self.STATE_FALSE_POSITIVE
|
|
self.save()
|
|
|
|
class Meta:
|
|
verbose_name = _('duplicate')
|
|
verbose_name_plural = _('duplicates')
|
|
ordering = ('-score', 'id')
|
|
unique_together = (('first', 'second'),)
|