250 lines
9.6 KiB
Python
250 lines
9.6 KiB
Python
# vim:spell:spelllang=fr
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
import sys
|
|
from optparse import make_option
|
|
import xml.etree.ElementTree as etree
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from django.db import transaction
|
|
|
|
from polynum.base.models import EntityType, Entity
|
|
from polynum.utils import slice_collection
|
|
|
|
from polynum.ldif import LDIFParser
|
|
|
|
|
|
class LdapEntity(object):
|
|
def __init__(self, **kwargs):
|
|
self.depth = 0
|
|
self.children = []
|
|
self.__dict__.update(kwargs)
|
|
|
|
def __unicode__(self):
|
|
return u'<LdapEntity: %s>' % (u', '.join(u'%s=%s' % (key, value) for key, value in self.__dict__.iteritems()))
|
|
|
|
|
|
class BaseParser(object):
|
|
def finish(self):
|
|
for entity in self.entities.values():
|
|
entity.parents = [self.entities[parent] for parent in entity.parents]
|
|
for entity in self.entities.values():
|
|
for parent in entity.parents:
|
|
if entity not in parent.children:
|
|
parent.children.append(entity)
|
|
for entity in self.entities.values():
|
|
entity.children = sorted(entity.children, key=lambda e: e.code)
|
|
print 'Parsed %06s\r' % len(self.entities)
|
|
|
|
|
|
class UpdXML(BaseParser):
|
|
def __init__(self, input):
|
|
self.input = input
|
|
self.entities = {}
|
|
|
|
def parse(self):
|
|
element_tree = etree.parse(self.input)
|
|
for structure in element_tree.findall('structure'):
|
|
code = structure.get('code').lower()
|
|
libelle_long = structure.find('libelle_long').text
|
|
libelle_court = structure.find('libelle_court').text
|
|
type_ = structure.find('type').text
|
|
pere = structure.findall('pere')
|
|
if pere is not None:
|
|
code_pere = map(lambda p: p.text.lower(), pere)
|
|
code_pere = filter(lambda x: x != 'sri', code_pere)
|
|
self.entities[code] = LdapEntity(code=code,
|
|
name=libelle_court,
|
|
parents=code_pere,
|
|
description=libelle_long,
|
|
type_=type_)
|
|
if len(self.entities) % 100 == 0:
|
|
print 'Parsed %06s\r' % len(self.entities),
|
|
sys.stdout.flush()
|
|
self.finish()
|
|
|
|
|
|
class UpdLDIF(BaseParser, LDIFParser):
|
|
def __init__(self, input):
|
|
LDIFParser.__init__(self, input)
|
|
self.entities = {}
|
|
|
|
def parse(self):
|
|
super(UpdLDIF, self).parse()
|
|
self.finish()
|
|
|
|
def handle(self, dn, entry):
|
|
if not dn:
|
|
return
|
|
if 'supannCodeEntite=' in dn:
|
|
parents = [parent.decode('utf-8').lower() for parent in entry.get('supannCodeEntiteParent', [])]
|
|
code = entry.get('supannCodeEntite')[0].lower()
|
|
type_ = entry.get('updEntiteType')[0]
|
|
if code in self.entities:
|
|
raise "double entity", code
|
|
name = ''.join(entry.get('ou') or [])
|
|
description = ''.join(entry.get('description') or [])
|
|
self.entities[code] = LdapEntity(code=code.decode('utf-8'),
|
|
name=name.decode('utf-8'),
|
|
parents=parents,
|
|
description=description.decode('utf-8'),
|
|
type_=type_)
|
|
if len(self.entities) % 100 == 0:
|
|
print 'Parsed %06s\r' % len(self.entities),
|
|
sys.stdout.flush()
|
|
|
|
|
|
class Command(BaseCommand):
|
|
'''
|
|
Recharge la liste des entites
|
|
'''
|
|
can_import_django_settings = True
|
|
requires_model_validation = True
|
|
args = ''
|
|
help = 'Recharge la liste des entites'
|
|
|
|
option_list = BaseCommand.option_list + (
|
|
make_option("--verbose", action="store_true"),
|
|
make_option("--xml", action="store_true"),
|
|
make_option("--test", action="store_true"),)
|
|
|
|
def build_parents_relations(self, relations, entity, current=None, direct=True, depth=0, path=[]):
|
|
if not current:
|
|
current = entity
|
|
entity.depth = max(entity.depth, depth)
|
|
if current.code in path:
|
|
raise Exception('loop detected: %s' % ([current.code] + path))
|
|
for parent_entity in current.parents:
|
|
relations.add((parent_entity, entity, direct))
|
|
self.build_parents_relations(relations, entity,
|
|
current=parent_entity, direct=False, depth=depth+1, path=[current.code]+path)
|
|
|
|
def allocate_instances(self, entity, left_bound, instances, path, depth=0, parent=None):
|
|
instance = self.old_instances.get(path)
|
|
if instance is None:
|
|
self.old_instances_by_last_hop.get(path[-2:])
|
|
if instance and not instance.reused:
|
|
old_dict = instance.__dict__.copy()
|
|
instance.reused = True
|
|
instance.left_bound=left_bound
|
|
instance.entity_type = self.entity_types[entity.type_]
|
|
instance.parent = parent
|
|
self.reused += 1
|
|
if old_dict != instance.__dict__:
|
|
instance.updated = True
|
|
self.update += 1
|
|
print 'Reused %06d instances\r' % self.reused,
|
|
sys.stdout.flush()
|
|
else:
|
|
instance = Entity(code=entity.code, name=entity.name,
|
|
description=entity.description, depth=depth, left_bound=left_bound,
|
|
entity_type=self.entity_types[entity.type_], parent=parent)
|
|
instances.append(instance)
|
|
right_bound = left_bound
|
|
for child in entity.children:
|
|
right_bound = self.allocate_instances(child, right_bound+1, instances, path + (child.code,), depth=depth+1, parent=instance)
|
|
instance.right_bound = right_bound+1
|
|
return instance.right_bound
|
|
|
|
def build_old_instances(self):
|
|
self.old_left_bound = -1
|
|
self.old_right_bound = 0
|
|
self.old_instances = {}
|
|
self.old_instances_by_last_hop = {}
|
|
self.reused = 0
|
|
self.update = 0
|
|
if not Entity.objects.all().exists():
|
|
return
|
|
path = ()
|
|
previous = None
|
|
paths = {}
|
|
root = Entity.objects.get(parent__isnull=True)
|
|
self.old_left_bound = root.left_bound
|
|
self.old_right_bound = root.right_bound
|
|
for entity in Entity.objects.order_by('depth', 'left_bound').select_related('parent'):
|
|
entity.reused = False
|
|
if entity.parent:
|
|
path = paths[entity.parent] + (entity.code,)
|
|
else:
|
|
path = (entity.code,)
|
|
paths[entity] = entity.path = path
|
|
self.old_instances[path] = entity
|
|
self.old_instances_by_last_hop[tuple(path[-2:])] = entity
|
|
|
|
def allocate_entity_types(self):
|
|
self.entity_types = {}
|
|
type_set = set()
|
|
for entity in self.parser.entities.values():
|
|
if entity.type_:
|
|
type_set.add(entity.type_)
|
|
for type_ in type_set:
|
|
self.entity_types[type_], created = EntityType.objects.get_or_create(name=type_, description=type_)
|
|
|
|
@transaction.commit_on_success
|
|
def load_file(self, filepath, **options):
|
|
if options['test']:
|
|
print 'Test mode, all operations will be rolled back at the end'
|
|
if options['xml']:
|
|
parser = UpdXML(file(filepath))
|
|
parser.parse()
|
|
else:
|
|
parser = UpdLDIF(file(filepath))
|
|
parser.parse()
|
|
self.parser = parser
|
|
self.allocate_entity_types()
|
|
relations = set()
|
|
for entity in self.parser.entities.values():
|
|
self.build_parents_relations(relations, entity)
|
|
# work around sqlite limitation
|
|
potential_roots = [entity for entity in parser.entities.values() if len(entity.parents) == 0]
|
|
assert len(potential_roots) < 2, "warning: multiple roots"
|
|
assert len(potential_roots) > 0, "warning: no root"
|
|
root = potential_roots[0]
|
|
instances = []
|
|
self.build_old_instances()
|
|
print 'Old bounds [%s, %s]' % (self.old_left_bound, self.old_right_bound)
|
|
if self.old_left_bound > 2*(len(parser.entities)+Entity.objects.all().count())-1:
|
|
start_bound = 0
|
|
else:
|
|
start_bound = self.old_right_bound+1
|
|
end_bound = self.allocate_instances(root, start_bound, instances, (root.code,))
|
|
print
|
|
left_bounds = [i.left_bound for i in instances if not i.id or i.reused]
|
|
assert left_bounds == sorted(left_bounds)
|
|
print 'New bounds [%s, %s]' % (start_bound, end_bound)
|
|
print len(filter(lambda x: bool(x.id) and not x.reused, self.old_instances.values())), 'entities to disable.'
|
|
i = 0
|
|
for sl in slice_collection(filter(lambda x: not x.id, instances), 20):
|
|
Entity.objects.bulk_create(sl)
|
|
i += len(sl)
|
|
print 'Added %06s entities\r' % i,
|
|
sys.stdout.flush()
|
|
print
|
|
i = 0
|
|
for entity in filter(lambda x: bool(x.id) and getattr(x, 'reused', False) and getattr(x, 'updated', False), instances):
|
|
entity.save()
|
|
i += 1
|
|
print 'Updated %06s entities\r' % i,
|
|
sys.stdout.flush()
|
|
print
|
|
i = 0
|
|
for entity in filter(lambda x: bool(x.id) and not x.reused, self.old_instances.values()):
|
|
entity.left_bound = end_bound+i*2+1
|
|
entity.right_bound = end_bound+i*2+2
|
|
entity.is_active = False
|
|
entity.save()
|
|
i += 1
|
|
print 'Disabled %06s entities\r' % i,
|
|
sys.stdout.flush()
|
|
print
|
|
if options['test']:
|
|
print 'Test mode, rolling back all operations...'
|
|
transaction.rollback()
|
|
print 'Done.'
|
|
|
|
|
|
def handle(self, *args, **options):
|
|
for filepath in args:
|
|
self.load_file(filepath, **options)
|
|
|