improve performance for collecting children/ancestors of entities

Now we store the full transitive closure for parent-children
relationship.  For 5040 entities it amounts to nearly 41 000 rows in the
relation table, giving a 3Mb sqlite db.
This commit is contained in:
Benjamin Dauvergne 2012-05-16 13:50:00 +02:00
parent 0bed86ed91
commit 33d8f98e87
4 changed files with 45 additions and 37 deletions

View File

@ -8,7 +8,7 @@ from django.core.management.base import BaseCommand
from polynum.base.models import Entity, EntityRelation
from polynum.utils import slice_collection
from ldif import LDIFParser
from polynum.ldif import LDIFParser
class LdapEntity(object):
def __init__(self, **kwargs):
@ -59,6 +59,13 @@ class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option("--delete"),)
def build_parents_relations(self, relations, entity, current=None, direct=True):
if not current:
current = entity
for parent_entity in current.parents:
relations.add((parent_entity, entity, direct))
self.build_parents_relations(relations, entity, current=parent_entity, direct=False)
def load_file(self, filepath):
parser = UpdLDIF(file(filepath))
parser.parse()
@ -70,14 +77,24 @@ class Command(BaseCommand):
entity.django_instance = \
Entity(code=entity.code, name=entity.name,
description=entity.description)
i = 0
for sl in slice_collection([entity.django_instance for entity in parser.entities.values()], 200):
Entity.objects.bulk_create(sl)
relations = []
i += len(sl)
print 'Added %06s entities\r' % i,
sys.stdout.flush()
print
relations = set()
for entity in parser.entities.values():
for parent in entity.parents:
relations.append(EntityRelation(parent=parent.django_instance, child=entity.django_instance))
for sl in slice_collection(relations, 50):
self.build_parents_relations(relations, entity)
i = 0
entity_relations = list(EntityRelation(parent=parent.django_instance, child=child.django_instance, direct=direct) for parent, child, direct in relations)
for sl in slice_collection(entity_relations, 50):
EntityRelation.objects.bulk_create(sl)
i += len(sl)
print 'Added %06s entity relations\r' % i,
sys.stdout.flush()
print
def handle(self, *args, **options):
for filepath in args:

View File

@ -8,8 +8,6 @@ Définition des entités administratives
from django.db import models
from django.utils.translation import ugettext_lazy as _
from polynum.utils import slice_collection
class Entity(models.Model):
class Meta:
@ -35,30 +33,15 @@ class Entity(models.Model):
get_accounting.short_description = _(u'Centre financier')
def children(self):
next_level = [self]
result = set()
while next_level:
tmp_next_level = set()
for sl in slice_collection(list(next_level), 50):
tmp_next_level.update(set(Entity.objects.filter(parent_relations__parent__in=sl)))
result.update(tmp_next_level)
next_level = tmp_next_level
return result
return Entity.objects.filter(parent_relations__parent=self)
def ancestors(self):
next_level = [self]
result = set()
while next_level:
tmp_next_level = set()
for sl in slice_collection(list(next_level), 50):
tmp_next_level.update(set(Entity.objects.filter(children_relations__child__in=sl)))
result.update(tmp_next_level)
next_level = tmp_next_level
return result
def parents(self):
return Entity.objects.filter(children_relations__child=self)
class EntityRelation(models.Model):
parent = models.ForeignKey(Entity, related_name='children_relations', verbose_name=_(u'Entité parente'), on_delete=models.DO_NOTHING)
child = models.ForeignKey(Entity, related_name='parent_relations', verbose_name=_(u'Entité fille'), on_delete=models.DO_NOTHING)
direct = models.BooleanField(default=True, blank=True)
class Meta:
app_label = 'base'

View File

@ -49,14 +49,13 @@ class CachingDecorator(object):
class CacheToSession(CachingDecorator):
def cache(self, args, kwargs):
return getattr(kwargs.pop('request', None), 'session', None)
return getattr(kwargs.get('request', None), 'session', None)
def get(self, cache, key, default=None):
if cache is not None:
return cache.get(key, default)
return default
def set(self, cache, key, value):
if cache is not None:
cache[key] = value
@ -90,13 +89,22 @@ class CacheToDjangoCache(CachingDecorator):
'''Decorator adding caching to the default Django cache backend'''
cache_to_django = CacheToDjangoCache
def slice_collection(slicable, length):
def slice_collection(iterable, length):
'''Cut a slicable container into sample of maximum size equals to length'''
i = 0
while True:
l = slicable[i:i+length]
yield l
if len(l) < length:
break
i += length
if hasattr(iterable, '__getslice__'):
i = 0
while True:
l = iterable[i:i+length]
yield l
if len(l) < length:
break
i += length
else:
l = []
for x in iterable:
l.append(x)
if len(l) == length:
yield l
l = []
if l:
yield l