This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
polynum/polynum/base/management/commands/loadentities.py

250 lines
9.6 KiB
Python

# vim:spell:spelllang=fr
# -*- encoding: utf-8 -*-
import sys
from optparse import make_option
import xml.etree.ElementTree as etree
from django.core.management.base import BaseCommand
from django.db import transaction
from polynum.base.models import EntityType, Entity
from polynum.utils import slice_collection
from polynum.ldif import LDIFParser
class LdapEntity(object):
def __init__(self, **kwargs):
self.depth = 0
self.children = []
self.__dict__.update(kwargs)
def __unicode__(self):
return u'<LdapEntity: %s>' % (u', '.join(u'%s=%s' % (key, value) for key, value in self.__dict__.iteritems()))
class BaseParser(object):
def finish(self):
for entity in self.entities.values():
entity.parents = [self.entities[parent] for parent in entity.parents]
for entity in self.entities.values():
for parent in entity.parents:
if entity not in parent.children:
parent.children.append(entity)
for entity in self.entities.values():
entity.children = sorted(entity.children, key=lambda e: e.code)
print 'Parsed %06s\r' % len(self.entities)
class UpdXML(BaseParser):
def __init__(self, input):
self.input = input
self.entities = {}
def parse(self):
element_tree = etree.parse(self.input)
for structure in element_tree.findall('structure'):
code = structure.get('code').lower()
libelle_long = structure.find('libelle_long').text
libelle_court = structure.find('libelle_court').text
type_ = structure.find('type').text
pere = structure.findall('pere')
if pere is not None:
code_pere = map(lambda p: p.text.lower(), pere)
code_pere = filter(lambda x: x != 'sri', code_pere)
self.entities[code] = LdapEntity(code=code,
name=libelle_court,
parents=code_pere,
description=libelle_long,
type_=type_)
if len(self.entities) % 100 == 0:
print 'Parsed %06s\r' % len(self.entities),
sys.stdout.flush()
self.finish()
class UpdLDIF(BaseParser, LDIFParser):
def __init__(self, input):
LDIFParser.__init__(self, input)
self.entities = {}
def parse(self):
super(UpdLDIF, self).parse()
self.finish()
def handle(self, dn, entry):
if not dn:
return
if 'supannCodeEntite=' in dn:
parents = [parent.decode('utf-8').lower() for parent in entry.get('supannCodeEntiteParent', [])]
code = entry.get('supannCodeEntite')[0].lower()
type_ = entry.get('updEntiteType')[0]
if code in self.entities:
raise "double entity", code
name = ''.join(entry.get('ou') or [])
description = ''.join(entry.get('description') or [])
self.entities[code] = LdapEntity(code=code.decode('utf-8'),
name=name.decode('utf-8'),
parents=parents,
description=description.decode('utf-8'),
type_=type_)
if len(self.entities) % 100 == 0:
print 'Parsed %06s\r' % len(self.entities),
sys.stdout.flush()
class Command(BaseCommand):
'''
Recharge la liste des entites
'''
can_import_django_settings = True
requires_model_validation = True
args = ''
help = 'Recharge la liste des entites'
option_list = BaseCommand.option_list + (
make_option("--verbose", action="store_true"),
make_option("--xml", action="store_true"),
make_option("--test", action="store_true"),)
def build_parents_relations(self, relations, entity, current=None, direct=True, depth=0, path=[]):
if not current:
current = entity
entity.depth = max(entity.depth, depth)
if current.code in path:
raise Exception('loop detected: %s' % ([current.code] + path))
for parent_entity in current.parents:
relations.add((parent_entity, entity, direct))
self.build_parents_relations(relations, entity,
current=parent_entity, direct=False, depth=depth+1, path=[current.code]+path)
def allocate_instances(self, entity, left_bound, instances, path, depth=0, parent=None):
instance = self.old_instances.get(path)
if instance is None:
self.old_instances_by_last_hop.get(path[-2:])
if instance and not instance.reused:
old_dict = instance.__dict__.copy()
instance.reused = True
instance.left_bound=left_bound
instance.entity_type = self.entity_types[entity.type_]
instance.parent = parent
self.reused += 1
if old_dict != instance.__dict__:
instance.updated = True
self.update += 1
print 'Reused %06d instances\r' % self.reused,
sys.stdout.flush()
else:
instance = Entity(code=entity.code, name=entity.name,
description=entity.description, depth=depth, left_bound=left_bound,
entity_type=self.entity_types[entity.type_], parent=parent)
instances.append(instance)
right_bound = left_bound
for child in entity.children:
right_bound = self.allocate_instances(child, right_bound+1, instances, path + (child.code,), depth=depth+1, parent=instance)
instance.right_bound = right_bound+1
return instance.right_bound
def build_old_instances(self):
self.old_left_bound = -1
self.old_right_bound = 0
self.old_instances = {}
self.old_instances_by_last_hop = {}
self.reused = 0
self.update = 0
if not Entity.objects.all().exists():
return
path = ()
previous = None
paths = {}
root = Entity.objects.get(parent__isnull=True)
self.old_left_bound = root.left_bound
self.old_right_bound = root.right_bound
for entity in Entity.objects.order_by('depth', 'left_bound').select_related('parent'):
entity.reused = False
if entity.parent:
path = paths[entity.parent] + (entity.code,)
else:
path = (entity.code,)
paths[entity] = entity.path = path
self.old_instances[path] = entity
self.old_instances_by_last_hop[tuple(path[-2:])] = entity
def allocate_entity_types(self):
self.entity_types = {}
type_set = set()
for entity in self.parser.entities.values():
if entity.type_:
type_set.add(entity.type_)
for type_ in type_set:
self.entity_types[type_], created = EntityType.objects.get_or_create(name=type_, description=type_)
@transaction.commit_on_success
def load_file(self, filepath, **options):
if options['test']:
print 'Test mode, all operations will be rolled back at the end'
if options['xml']:
parser = UpdXML(file(filepath))
parser.parse()
else:
parser = UpdLDIF(file(filepath))
parser.parse()
self.parser = parser
self.allocate_entity_types()
relations = set()
for entity in self.parser.entities.values():
self.build_parents_relations(relations, entity)
# work around sqlite limitation
potential_roots = [entity for entity in parser.entities.values() if len(entity.parents) == 0]
assert len(potential_roots) < 2, "warning: multiple roots"
assert len(potential_roots) > 0, "warning: no root"
root = potential_roots[0]
instances = []
self.build_old_instances()
print 'Old bounds [%s, %s]' % (self.old_left_bound, self.old_right_bound)
if self.old_left_bound > 2*(len(parser.entities)+Entity.objects.all().count())-1:
start_bound = 0
else:
start_bound = self.old_right_bound+1
end_bound = self.allocate_instances(root, start_bound, instances, (root.code,))
print
left_bounds = [i.left_bound for i in instances if not i.id or i.reused]
assert left_bounds == sorted(left_bounds)
print 'New bounds [%s, %s]' % (start_bound, end_bound)
print len(filter(lambda x: bool(x.id) and not x.reused, self.old_instances.values())), 'entities to disable.'
i = 0
for sl in slice_collection(filter(lambda x: not x.id, instances), 20):
Entity.objects.bulk_create(sl)
i += len(sl)
print 'Added %06s entities\r' % i,
sys.stdout.flush()
print
i = 0
for entity in filter(lambda x: bool(x.id) and getattr(x, 'reused', False) and getattr(x, 'updated', False), instances):
entity.save()
i += 1
print 'Updated %06s entities\r' % i,
sys.stdout.flush()
print
i = 0
for entity in filter(lambda x: bool(x.id) and not x.reused, self.old_instances.values()):
entity.left_bound = end_bound+i*2+1
entity.right_bound = end_bound+i*2+2
entity.is_active = False
entity.save()
i += 1
print 'Disabled %06s entities\r' % i,
sys.stdout.flush()
print
if options['test']:
print 'Test mode, rolling back all operations...'
transaction.rollback()
print 'Done.'
def handle(self, *args, **options):
for filepath in args:
self.load_file(filepath, **options)