manager: export users asynchronously (#43153)

This commit is contained in:
Valentin Deniaud 2021-03-31 12:37:20 +02:00
parent 413604d06b
commit b3bc13d26c
11 changed files with 242 additions and 27 deletions

View File

@ -12,7 +12,7 @@ http-socket = /run/authentic2-multitenant/authentic2-multitenant.sock
chmod-socket = 666
vacuum = true
spooler-processes = 3
spooler-processes = 5
spooler-python-import = authentic2.utils.spooler
master = true

View File

@ -44,8 +44,7 @@ class UserQuerySet(models.QuerySet):
return wrap_qs(self.none())
if '@' in search and len(search.split()) == 1:
with connection.cursor() as cursor:
cursor.execute("SET pg_trgm.similarity_threshold = %f" % app_settings.A2_FTS_THRESHOLD)
self.set_trigram_similarity_threshold()
qs = self.filter(email__icontains=search).order_by(Unaccent('last_name'), Unaccent('first_name'))
if qs.exists():
return wrap_qs(qs)
@ -109,10 +108,7 @@ class UserQuerySet(models.QuerySet):
def find_duplicates(
self, first_name=None, last_name=None, fullname=None, birthdate=None, limit=5, threshold=None
):
with connection.cursor() as cursor:
cursor.execute(
"SET pg_trgm.similarity_threshold = %f" % (threshold or app_settings.A2_DUPLICATES_THRESHOLD)
)
self.set_trigram_similarity_threshold(threshold=threshold or app_settings.A2_DUPLICATES_THRESHOLD)
if fullname is not None:
name = fullname
@ -146,6 +142,12 @@ class UserQuerySet(models.QuerySet):
return qs
def set_trigram_similarity_threshold(self, threshold=None):
with connection.cursor() as cursor:
cursor.execute(
"SET pg_trgm.similarity_threshold = %f" % (threshold or app_settings.A2_FTS_THRESHOLD)
)
class UserManager(BaseUserManager):
def _create_user(self, username, email, password, is_staff, is_superuser, **extra_fields):

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -264,3 +264,8 @@ form .widget span.select2-container {
.journal-list--timestamp-column {
white-space: pre;
}
span.activity {
background: url(indicator.gif) no-repeat top right;
padding-right: 30px;
}

View File

@ -0,0 +1,45 @@
{% extends "authentic2/manager/base.html" %}
{% load i18n gadjo staticfiles %}
{% block page-title %}{{ block.super }} - {% trans "User export" %}{% endblock %}
{% block appbar %}
<h2>Users export</h2>
{% endblock %}
{% block breadcrumb %}
{{ block.super }}
<a href="{% url 'a2-manager-users' %}">{% trans 'Users' %}</a>
<a href="{% url 'a2-manager-users-export-progress' uuid=uuid %}"></a>
{% endblock %}
{% block main %}
<div class="section">
<div class="running">
<p>{% trans "Preparing CSV export file..." %}</p>
<span class="activity">{% trans "Progress:" %} <span id="progress">0</span>%</span>
</div>
<div class="done">
<p>{% trans "Export completed." %}</p>
<p><a class="button" href="{% url 'a2-manager-users-export-file' uuid=uuid %}">{% trans "Download CSV" %}</a></p>
</div>
</div>
<script>
function updateStatus() {
$('div.done').hide();
$.get('{% url 'a2-manager-users-export-progress' uuid=uuid %}', null,
function (text) {
if(text != 100) {
$('span#progress').text(text);
window.setTimeout(updateStatus, 2500);
} else {
$('div.running').hide();
$('div.done').show();
}
}
);
}
$(document).ready(updateStatus);
</script>
{% endblock %}

View File

@ -38,6 +38,16 @@ urlpatterns = required(
# Authentic2 users
url(r'^users/$', user_views.users, name='a2-manager-users'),
url(r'^users/export/(?P<format>csv)/$', user_views.users_export, name='a2-manager-users-export'),
url(
r'^users/export/(?P<uuid>[a-z0-9-]+)/progress/$',
user_views.users_export_progress,
name='a2-manager-users-export-progress',
),
url(
r'^users/export/(?P<uuid>[a-z0-9-]+)/$',
user_views.users_export_file,
name='a2-manager-users-export-file',
),
url(r'^users/add/$', user_views.user_add_default_ou, name='a2-manager-user-add-default-ou'),
url(r'^users/add/choose-ou/$', user_views.user_add_choose_ou, name='a2-manager-user-add-choose-ou'),
url(r'^users/import/$', user_views.user_imports, name='a2-manager-users-imports'),

View File

@ -16,13 +16,18 @@
import collections
import datetime
import os
import pickle
import uuid
import tablib
from django.contrib.auth import get_user_model
from django.contrib.contenttypes.models import ContentType
from django.core.files.storage import default_storage
from authentic2.manager.resources import UserResource
from authentic2.models import Attribute, AttributeValue
from authentic2.utils import batch_queryset
def get_user_dataset(qs):
@ -72,3 +77,72 @@ def get_user_dataset(qs):
for user in qs:
dataset.append(create_record(user))
return dataset
class UserExport(object):
def __init__(self, uuid):
self.uuid = uuid
self.path = os.path.join(self.base_path(), self.uuid)
self.export_path = os.path.join(self.path, 'export.csv')
self.progress_path = os.path.join(self.path, 'progress')
@classmethod
def base_path(self):
path = default_storage.path('user_exports')
if not os.path.exists(path):
os.makedirs(path)
return path
@property
def exists(self):
return os.path.exists(self.path)
@classmethod
def new(cls):
export = cls(str(uuid.uuid4()))
os.makedirs(export.path)
return export
@property
def csv(self):
return open(self.export_path, 'r')
def set_export_content(self, content):
with open(self.export_path, 'w') as f:
f.write(content)
@property
def progress(self):
progress = 0
if os.path.exists(self.progress_path):
with open(self.progress_path, 'r') as f:
progress = f.read()
return int(progress) if progress else 0
def set_progress(self, progress):
with open(self.progress_path, 'w') as f:
f.write(str(progress))
def export_users_to_file(uuid, query):
export = UserExport(uuid)
qs = get_user_model().objects.all()
qs.set_trigram_similarity_threshold()
qs.query = query
qs = qs.select_related('ou')
qs = qs.prefetch_related('roles', 'roles__parent_relation__parent')
count = qs.count() or 1
def callback(progress):
export.set_progress(round(progress / count * 100))
qs = batch_queryset(qs, progress_callback=callback)
dataset = get_user_dataset(qs)
if hasattr(dataset, 'csv'):
# compatiblity for tablib < 0.11
csv = dataset.csv
else:
csv = dataset.export('csv')
export.set_export_content(csv)
export.set_progress(100)

View File

@ -17,21 +17,24 @@
import base64
import collections
import operator
import pickle
import sys
from django.contrib import messages
from django.contrib.auth import REDIRECT_FIELD_NAME, get_user_model
from django.core.exceptions import PermissionDenied
from django.core.mail import EmailMultiAlternatives
from django.db import models, transaction
from django.http import FileResponse, Http404
from django.db import connection, models, transaction
from django.http import FileResponse, Http404, HttpResponse
from django.shortcuts import get_object_or_404
from django.template import loader
from django.urls import reverse, reverse_lazy
from django.utils import timezone
from django.utils.functional import cached_property
from django.utils.html import format_html
from django.utils.translation import pgettext_lazy, ugettext
from django.utils.translation import ugettext_lazy as _
from django.views.generic import DetailView, FormView, TemplateView
from django.views.generic import DetailView, FormView, TemplateView, View
from django.views.generic.detail import SingleObjectMixin
from django.views.generic.edit import BaseFormView
@ -39,7 +42,14 @@ from authentic2 import hooks
from authentic2.a2_rbac.utils import get_default_ou
from authentic2.apps.journal.views import JournalViewWithContext
from authentic2.models import Attribute, PasswordReset
from authentic2.utils import make_url, redirect, select_next_url, send_password_reset_mail, switch_user
from authentic2.utils import (
make_url,
redirect,
select_next_url,
send_password_reset_mail,
spooler,
switch_user,
)
from authentic2_idp_oidc.models import OIDCAuthorization, OIDCClient
from django_rbac.utils import get_ou_model, get_role_model, get_role_parenting_model
@ -60,7 +70,7 @@ from .forms import (
from .journal_views import BaseJournalView
from .resources import UserResource
from .tables import OuUserRolesTable, UserAuthorizationsTable, UserRolesTable, UserTable
from .user_export import get_user_dataset
from .user_export import UserExport, get_user_dataset
from .utils import get_ou_count, has_show_username
from .views import (
Action,
@ -501,26 +511,59 @@ class UserEditView(OtherActionsMixin, ActionMixin, BaseEditView):
user_edit = UserEditView.as_view()
class UsersExportView(ExportMixin, UsersView):
class UsersExportView(UsersView):
permissions = ['custom_user.view_user']
resource_class = UserResource
export_prefix = 'users-'
@property
def csv(self):
if hasattr(self._dataset, 'csv'):
# compatiblity for tablib < 0.11
return self._dataset.csv
return self._dataset.export('csv')
def get_dataset(self):
self._dataset = get_user_dataset(self.get_data())
return self
def get(self, request, *args, **kwargs):
export = UserExport.new()
query = self.get_table_data().query
spooler.export_users(uuid=export.uuid, query=query)
return redirect(request, 'a2-manager-users-export-progress', kwargs={'uuid': export.uuid})
users_export = UsersExportView.as_view()
class UsersExportFileView(ExportMixin, PermissionMixin, View):
permissions = ['custom_user.view_user']
def get(self, request, *args, **kwargs):
self.export = UserExport(kwargs.get('uuid'))
if not self.export.exists:
raise Http404()
response = HttpResponse(self.export.csv, content_type='text/csv')
filename = 'users-%s.csv' % timezone.now().strftime('%Y%m%d_%H%M%S')
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
return response
users_export_file = UsersExportFileView.as_view()
class UsersExportProgressView(MediaMixin, TemplateView):
template_name = 'authentic2/manager/user_export.html'
def get(self, request, *args, **kwargs):
self.uuid = kwargs.get('uuid')
export = UserExport(self.uuid)
if not export.exists:
raise Http404()
if request.is_ajax():
return HttpResponse(export.progress)
return super().get(request, *args, **kwargs)
def get_context_data(self, **kwargs):
ctx = super().get_context_data(**kwargs)
ctx['uuid'] = self.uuid
return ctx
users_export_progress = UsersExportProgressView.as_view()
class UserChangePasswordView(BaseEditView):
template_name = 'authentic2/manager/form.html'
model = get_user_model()

View File

@ -963,7 +963,7 @@ def batch(iterable, size):
yield chain([batchiter.next()], batchiter)
def batch_queryset(qs, size=1000):
def batch_queryset(qs, size=1000, progress_callback=None):
"""Batch prefetched potentially very large queryset, it's a middle ground
between using .iterator() which cannot be prefetched and prefetching a full
table, which can take a larte place in memory.
@ -972,6 +972,8 @@ def batch_queryset(qs, size=1000):
chunk = qs[i * size : (i + 1) * size]
if not chunk:
break
if progress_callback:
progress_callback(i * size)
for row in chunk:
yield row

View File

@ -84,3 +84,10 @@ def tenantspool(func):
return base_spooler(*args, **kwargs)
return spooler
@tenantspool
def export_users(uuid, query):
from authentic2.manager.user_export import export_users_to_file
export_users_to_file(uuid, query)

View File

@ -339,12 +339,37 @@ def test_export_csv(settings, app, superuser, django_assert_num_queries):
# queries should be batched to keep prefetching working without
# overspending memory for the queryset cache, 4 queries by batches
num_queries = int(4 + 4 * (user_count / DEFAULT_BATCH_SIZE + bool(user_count % DEFAULT_BATCH_SIZE)))
# export task also perform one query to set trigram an another to get users count
num_queries += 2
with django_assert_num_queries(num_queries):
response = response.click('CSV')
url = response.url
response = response.follow()
assert 'Preparing CSV export file...' in response.text
assert '<span id="progress">0</span>' in response.text
response = response.click('Download CSV')
table = list(csv.reader(response.text.splitlines()))
assert len(table) == (user_count + 1)
assert len(table[0]) == (15 + AT_COUNT)
# ajax call returns 100% progress
resp = app.get(url, xhr=True)
assert resp.text == '100'
def test_export_csv_search(settings, app, superuser):
users = [User(username='user%s' % i) for i in range(10)]
User.objects.bulk_create(users)
response = login(app, superuser)
resp = app.get('/manage/users/?search-text=user1')
resp = resp.click('CSV').follow()
resp = resp.click('Download CSV')
table = list(csv.reader(resp.text.splitlines()))
assert len(table) == 3 # user1 and superuser match
def test_export_csv_disabled_attribute(settings, app, superuser):
attr = Attribute.objects.create(name='attr', label='Attr', kind='string')
@ -359,7 +384,8 @@ def test_export_csv_disabled_attribute(settings, app, superuser):
response = login(app, superuser, reverse('a2-manager-users'))
settings.A2_CACHE_ENABLED = True
response = response.click('CSV')
response = response.click('CSV').follow()
response = response.click('Download CSV')
user_count = User.objects.count()
table = list(csv.reader(response.text.splitlines()))
@ -380,7 +406,8 @@ def test_export_csv_user_delete(settings, app, superuser):
response = login(app, superuser, reverse('a2-manager-users'))
settings.A2_CACHE_ENABLED = True
response = response.click('CSV')
response = response.click('CSV').follow()
response = response.click('Download CSV')
table = list(csv.reader(response.text.splitlines()))
# superuser + ten created users + csv header - three users marked as deteled
assert len(table) == (1 + 10 + 1 - 3)