From 2bca9792bd9967e6788cfe48a06b79268d937b60 Mon Sep 17 00:00:00 2001 From: Christophe Siraut Date: Tue, 8 Sep 2020 16:03:39 +0200 Subject: [PATCH] mail: rework extract_exim_emails() --- logtracker/mail/utils.py | 87 ++++++++++++++++++---------------------- tests/test_mail.py | 1 - 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/logtracker/mail/utils.py b/logtracker/mail/utils.py index a7f7b17..fe3d3a4 100755 --- a/logtracker/mail/utils.py +++ b/logtracker/mail/utils.py @@ -2,75 +2,64 @@ # Entrouvert 2019 # Exim log parser # See Summary of Fields in Log Lines in https://www.exim.org/exim-html-current/doc/html/spec_html/ch-log_files.html -import copy import re -import datetime -import json import pytz -from django.utils import timezone from logtracker.journal.models import Entry from logtracker.mail.models import Mail, Property, Sender, Recipient paris = pytz.timezone('Europe/Paris') -patterns = {'ignore': re.compile('([\d-]+) ([\d:]+) .*(Start queue run|End queue run|daemon started|relay not permitted|Spool file is locked|Connection refused|Connection timed out|no immediate delivery|error ignored|Greylisting in action|Remote host closed connection|No route to host|SMTP error|SMTP protocol error|SMTP protocol synchronization error|SMTP command timeout|no host name found|unexpected disconnection|TLS error|log string overflowed|cancelled by timeout|PRDR|Key usage violation in certificate has been detected.: delivering unencrypted).*'), - 'match': re.compile('(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) ([\w\d\-]+) (<=|=>|->|==|\*\*|Completed|SMTP error|Message is frozen|Frozen|Unfrozen)\s*(.*)$'), +patterns = {'ignore': re.compile('([\d-]+) ([\d:]+)? .*(Start queue run|End queue run|daemon started|relay not permitted|Spool file is locked' + '|Connection refused|Connection timed out|no immediate delivery|error ignored|Greylisting in action' + '|Remote host closed connection|No route to host|SMTP error|SMTP protocol error' + '|SMTP protocol synchronization error|SMTP command timeout|no host name found|unexpected disconnection' + '|TLS error|log string overflowed|cancelled by timeout|PRDR' + '|Key usage violation in certificate has been detected.: delivering unencrypted' + '|SIGHUP received: re-exec daemon).*'), + 'match': re.compile('(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)? ([\w\d\-]+) (<=|=>|->|==|\*\*|Completed|SMTP error|Message is frozen|Frozen|Unfrozen)\s*(.*)$'), } - -def parse_date(string): - stamp = datetime.datetime.strptime(string, '%Y-%m-%d %H:%M:%S') - return timezone.make_aware(stamp, paris) - - -def parse_exim_line(line): - match = re.match(patterns['match'], line) - if match: - stamp, identifier, action, data = match.groups() - if 'domain matches queue_smtp_domains, or -odqs set' in data: - return - stamp = parse_date(stamp) - return {'stamp': stamp, 'identifier': identifier, 'action': action, 'data': data[:511]} - else: - match = re.match(patterns['ignore'], line) - if not match: - print('Failed to parse line: %s' % line) +def parse_exim_lines(qs, debug=False): + for entry in qs: + if not entry.data.get('SYSLOG_IDENTIFIER') == 'exim': + continue + message = entry.data.get('MESSAGE') + match = re.match(patterns['match'], message) + if match: + _, identifier, action, data = match.groups() + yield entry, identifier, action, data def extract_exim_emails(): metadata, _ = Property.objects.get_or_create(name='metadata') if 'extracted_index' not in metadata.data: - metadata.data = json.loads('{"extracted_index": 0}') + metadata.data = {"extracted_index": 0} metadata.save() entry = None try: - for entry in Entry.objects.filter(pk__gt=metadata.data['extracted_index']): - if entry.data.get('_COMM') in ['exim', 'exim4']: - parsed = parse_exim_line(entry.data.get('MESSAGE')) - if not parsed: - continue - mail, created = Mail.objects.get_or_create(identifier=parsed['identifier']) - if created: - mail.host = entry.host - mail.timestamp = entry.timestamp - if parsed['action'] == '<=': - address = parsed['data'].split(' ')[0] - sender, _ = Sender.objects.get_or_create(email=address) - mail.sender = sender - elif parsed['action'] in ('=>', '->'): - address, _ = Recipient.objects.get_or_create(email=parsed['data'].split(' ')[0]) - mail.recipients.add(address) - elif parsed['action'] == 'Completed': - mail.has_completed = True - elif parsed['action'] == '==' or parsed['action'] == '**': - mail.has_error = True - entries = copy.deepcopy(mail.entries) - entries.append(entry.pk) - mail.entries = entries - mail.save() + for entry, identifier, action, data in parse_exim_lines(Entry.objects.filter(pk__gt=metadata.data['extracted_index'])): + mail, created = Mail.objects.get_or_create(identifier=identifier) + if created: + mail.host = entry.host + mail.timestamp = entry.timestamp + mail.entries = [entry.pk] + else: + mail.entries.append(entry.pk) + if action == '<=': + address = data.split(' ')[0] + sender, _ = Sender.objects.get_or_create(email=address) + mail.sender = sender + elif action in ('=>', '->'): + address, _ = Recipient.objects.get_or_create(email=data.split(' ')[0]) + mail.recipients.add(address) + elif action == 'Completed': + mail.has_completed = True + elif action == '==' or action == '**': + mail.has_error = True + mail.save() finally: if entry: metadata.data['extracted_index'] = entry.pk diff --git a/tests/test_mail.py b/tests/test_mail.py index 4277658..17321b4 100644 --- a/tests/test_mail.py +++ b/tests/test_mail.py @@ -11,7 +11,6 @@ def test_mail_api(auth, client): def test_entries_list(auth, client): - extract_exim_emails() page = client.get('/api/mail/') assert page.status_code == 200