124 lines
4.4 KiB
Python
124 lines
4.4 KiB
Python
from __future__ import print_function
|
|
import shutil
|
|
import sys
|
|
import os
|
|
import os.path
|
|
import datetime as dt
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from django.db import transaction
|
|
from django.core import serializers
|
|
from django.utils.encoding import force_str
|
|
from django.utils.timezone import now
|
|
from django_journal.models import Journal
|
|
|
|
from docbow_project.docbow import models
|
|
|
|
|
|
def batch(qs, window):
|
|
qs = qs.order_by('id')
|
|
if not qs.exists():
|
|
return
|
|
after = qs[0].id
|
|
while qs.filter(id__gte=after).exists():
|
|
yield qs.filter(id__gte=after, id__lt=after + window)
|
|
after += window
|
|
|
|
|
|
window = 1000
|
|
|
|
|
|
class Command(BaseCommand):
|
|
args = '<directory> <days of retention>'
|
|
help = 'Archive documents and journal'
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument('directory', type=str)
|
|
parser.add_argument('days_of_retention', type=int)
|
|
|
|
def save_documents(self):
|
|
qs = models.Document.objects.filter(date__lte=self.before)
|
|
i = 0
|
|
if not qs.exists():
|
|
return
|
|
for documents in batch(qs, window):
|
|
for document in documents:
|
|
doc_path = os.path.join(self.doc_path, str(document.id))
|
|
os.mkdir(doc_path)
|
|
json_path = os.path.join(doc_path, 'document.json')
|
|
with open(json_path, 'w') as document_json:
|
|
document_json.write(
|
|
serializers.serialize('json', [document], indent=2, use_natural_foreign_keys=True)
|
|
)
|
|
for attached_file in document.attached_files.all():
|
|
file_path = os.path.join(doc_path, os.path.basename(attached_file.content.name))
|
|
with open(file_path, 'wb') as data_file:
|
|
data_file.write(attached_file.content.read())
|
|
attached_file.content.close()
|
|
attached_file_path = os.path.join(doc_path, 'attached_file_%s.json' % attached_file.id)
|
|
with open(attached_file_path, 'w') as json_file:
|
|
json_file.write(
|
|
serializers.serialize(
|
|
'json', [attached_file], indent=2, use_natural_foreign_keys=True
|
|
)
|
|
)
|
|
i += len(documents)
|
|
print(
|
|
' - Archived %10d documents' % i, '\r',
|
|
)
|
|
sys.stdout.flush()
|
|
print('')
|
|
i = 0
|
|
for b in batch(qs, 1000):
|
|
b.delete()
|
|
i += len(documents)
|
|
print(
|
|
' - Deleted %10d documents' % i, '\r',
|
|
)
|
|
sys.stdout.flush()
|
|
print('')
|
|
|
|
def save_journal(self):
|
|
journals = (
|
|
Journal.objects.filter(time__lte=self.before).order_by('id').select_related('tag', 'template')
|
|
) # FIXME in django 1.11
|
|
# .prefetch_related('objectdata_set__content_type',
|
|
# 'stringdata_set', 'objectdata_set__tag',
|
|
# 'stringdata_set__tag', 'objectdata_set__content_object')
|
|
if not journals.exists():
|
|
return
|
|
journal_path = os.path.join(self.path, 'journal.txt')
|
|
i = 0
|
|
with open(journal_path, 'w') as journal_file:
|
|
for b in batch(journals, window):
|
|
for journal in b:
|
|
journal_file.write("%s %s" % (journal.time, force_str(journal).replace('\n', '\n ')))
|
|
i += b.count()
|
|
print(' - Archived %10d lines\r' % i,)
|
|
sys.stdout.flush()
|
|
print('')
|
|
i = 0
|
|
for b in batch(journals, window):
|
|
i += b.count()
|
|
b.delete()
|
|
print(' - Deleted %10d lines\r' % i,)
|
|
sys.stdout.flush()
|
|
print('')
|
|
|
|
@transaction.atomic
|
|
def handle(self, *args, **options):
|
|
self.directory = options['directory']
|
|
self.days = options['days_of_retention']
|
|
self.before = now() - dt.timedelta(days=self.days)
|
|
print('Archive journal and documents older than', self.before, ':')
|
|
self.path = os.path.join(self.directory, now().isoformat())
|
|
self.doc_path = os.path.join(self.path, 'doc')
|
|
os.mkdir(self.path)
|
|
os.mkdir(self.doc_path)
|
|
try:
|
|
self.save_documents()
|
|
self.save_journal()
|
|
except:
|
|
shutil.rmtree(self.path)
|
|
raise
|