docbow/docbow_project/pfwb/management/commands/archive2.py

124 lines
4.4 KiB
Python

from __future__ import print_function
import shutil
import sys
import os
import os.path
import datetime as dt
from django.core.management.base import BaseCommand
from django.db import transaction
from django.core import serializers
from django.utils.encoding import force_str
from django.utils.timezone import now
from django_journal.models import Journal
from docbow_project.docbow import models
def batch(qs, window):
qs = qs.order_by('id')
if not qs.exists():
return
after = qs[0].id
while qs.filter(id__gte=after).exists():
yield qs.filter(id__gte=after, id__lt=after + window)
after += window
window = 1000
class Command(BaseCommand):
args = '<directory> <days of retention>'
help = 'Archive documents and journal'
def add_arguments(self, parser):
parser.add_argument('directory', type=str)
parser.add_argument('days_of_retention', type=int)
def save_documents(self):
qs = models.Document.objects.filter(date__lte=self.before)
i = 0
if not qs.exists():
return
for documents in batch(qs, window):
for document in documents:
doc_path = os.path.join(self.doc_path, str(document.id))
os.mkdir(doc_path)
json_path = os.path.join(doc_path, 'document.json')
with open(json_path, 'w') as document_json:
document_json.write(
serializers.serialize('json', [document], indent=2, use_natural_foreign_keys=True)
)
for attached_file in document.attached_files.all():
file_path = os.path.join(doc_path, os.path.basename(attached_file.content.name))
with open(file_path, 'wb') as data_file:
data_file.write(attached_file.content.read())
attached_file.content.close()
attached_file_path = os.path.join(doc_path, 'attached_file_%s.json' % attached_file.id)
with open(attached_file_path, 'w') as json_file:
json_file.write(
serializers.serialize(
'json', [attached_file], indent=2, use_natural_foreign_keys=True
)
)
i += len(documents)
print(
' - Archived %10d documents' % i, '\r',
)
sys.stdout.flush()
print('')
i = 0
for b in batch(qs, 1000):
b.delete()
i += len(documents)
print(
' - Deleted %10d documents' % i, '\r',
)
sys.stdout.flush()
print('')
def save_journal(self):
journals = (
Journal.objects.filter(time__lte=self.before).order_by('id').select_related('tag', 'template')
) # FIXME in django 1.11
# .prefetch_related('objectdata_set__content_type',
# 'stringdata_set', 'objectdata_set__tag',
# 'stringdata_set__tag', 'objectdata_set__content_object')
if not journals.exists():
return
journal_path = os.path.join(self.path, 'journal.txt')
i = 0
with open(journal_path, 'w') as journal_file:
for b in batch(journals, window):
for journal in b:
journal_file.write("%s %s" % (journal.time, force_str(journal).replace('\n', '\n ')))
i += b.count()
print(' - Archived %10d lines\r' % i,)
sys.stdout.flush()
print('')
i = 0
for b in batch(journals, window):
i += b.count()
b.delete()
print(' - Deleted %10d lines\r' % i,)
sys.stdout.flush()
print('')
@transaction.atomic
def handle(self, *args, **options):
self.directory = options['directory']
self.days = options['days_of_retention']
self.before = now() - dt.timedelta(days=self.days)
print('Archive journal and documents older than', self.before, ':')
self.path = os.path.join(self.directory, now().isoformat())
self.doc_path = os.path.join(self.path, 'doc')
os.mkdir(self.path)
os.mkdir(self.doc_path)
try:
self.save_documents()
self.save_journal()
except:
shutil.rmtree(self.path)
raise