mail: rework extract_exim_emails()

This commit is contained in:
Christophe Siraut 2020-09-08 16:03:39 +02:00
parent e4566af515
commit 2bca9792bd
2 changed files with 38 additions and 50 deletions

View File

@ -2,75 +2,64 @@
# Entrouvert 2019
# Exim log parser
# See Summary of Fields in Log Lines in https://www.exim.org/exim-html-current/doc/html/spec_html/ch-log_files.html
import copy
import re
import datetime
import json
import pytz
from django.utils import timezone
from logtracker.journal.models import Entry
from logtracker.mail.models import Mail, Property, Sender, Recipient
paris = pytz.timezone('Europe/Paris')
patterns = {'ignore': re.compile('([\d-]+) ([\d:]+) .*(Start queue run|End queue run|daemon started|relay not permitted|Spool file is locked|Connection refused|Connection timed out|no immediate delivery|error ignored|Greylisting in action|Remote host closed connection|No route to host|SMTP error|SMTP protocol error|SMTP protocol synchronization error|SMTP command timeout|no host name found|unexpected disconnection|TLS error|log string overflowed|cancelled by timeout|PRDR|Key usage violation in certificate has been detected.: delivering unencrypted).*'),
'match': re.compile('(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) ([\w\d\-]+) (<=|=>|->|==|\*\*|Completed|SMTP error|Message is frozen|Frozen|Unfrozen)\s*(.*)$'),
patterns = {'ignore': re.compile('([\d-]+) ([\d:]+)? .*(Start queue run|End queue run|daemon started|relay not permitted|Spool file is locked'
'|Connection refused|Connection timed out|no immediate delivery|error ignored|Greylisting in action'
'|Remote host closed connection|No route to host|SMTP error|SMTP protocol error'
'|SMTP protocol synchronization error|SMTP command timeout|no host name found|unexpected disconnection'
'|TLS error|log string overflowed|cancelled by timeout|PRDR'
'|Key usage violation in certificate has been detected.: delivering unencrypted'
'|SIGHUP received: re-exec daemon).*'),
'match': re.compile('(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)? ([\w\d\-]+) (<=|=>|->|==|\*\*|Completed|SMTP error|Message is frozen|Frozen|Unfrozen)\s*(.*)$'),
}
def parse_date(string):
stamp = datetime.datetime.strptime(string, '%Y-%m-%d %H:%M:%S')
return timezone.make_aware(stamp, paris)
def parse_exim_line(line):
match = re.match(patterns['match'], line)
if match:
stamp, identifier, action, data = match.groups()
if 'domain matches queue_smtp_domains, or -odqs set' in data:
return
stamp = parse_date(stamp)
return {'stamp': stamp, 'identifier': identifier, 'action': action, 'data': data[:511]}
else:
match = re.match(patterns['ignore'], line)
if not match:
print('Failed to parse line: %s' % line)
def parse_exim_lines(qs, debug=False):
for entry in qs:
if not entry.data.get('SYSLOG_IDENTIFIER') == 'exim':
continue
message = entry.data.get('MESSAGE')
match = re.match(patterns['match'], message)
if match:
_, identifier, action, data = match.groups()
yield entry, identifier, action, data
def extract_exim_emails():
metadata, _ = Property.objects.get_or_create(name='metadata')
if 'extracted_index' not in metadata.data:
metadata.data = json.loads('{"extracted_index": 0}')
metadata.data = {"extracted_index": 0}
metadata.save()
entry = None
try:
for entry in Entry.objects.filter(pk__gt=metadata.data['extracted_index']):
if entry.data.get('_COMM') in ['exim', 'exim4']:
parsed = parse_exim_line(entry.data.get('MESSAGE'))
if not parsed:
continue
mail, created = Mail.objects.get_or_create(identifier=parsed['identifier'])
if created:
mail.host = entry.host
mail.timestamp = entry.timestamp
if parsed['action'] == '<=':
address = parsed['data'].split(' ')[0]
sender, _ = Sender.objects.get_or_create(email=address)
mail.sender = sender
elif parsed['action'] in ('=>', '->'):
address, _ = Recipient.objects.get_or_create(email=parsed['data'].split(' ')[0])
mail.recipients.add(address)
elif parsed['action'] == 'Completed':
mail.has_completed = True
elif parsed['action'] == '==' or parsed['action'] == '**':
mail.has_error = True
entries = copy.deepcopy(mail.entries)
entries.append(entry.pk)
mail.entries = entries
mail.save()
for entry, identifier, action, data in parse_exim_lines(Entry.objects.filter(pk__gt=metadata.data['extracted_index'])):
mail, created = Mail.objects.get_or_create(identifier=identifier)
if created:
mail.host = entry.host
mail.timestamp = entry.timestamp
mail.entries = [entry.pk]
else:
mail.entries.append(entry.pk)
if action == '<=':
address = data.split(' ')[0]
sender, _ = Sender.objects.get_or_create(email=address)
mail.sender = sender
elif action in ('=>', '->'):
address, _ = Recipient.objects.get_or_create(email=data.split(' ')[0])
mail.recipients.add(address)
elif action == 'Completed':
mail.has_completed = True
elif action == '==' or action == '**':
mail.has_error = True
mail.save()
finally:
if entry:
metadata.data['extracted_index'] = entry.pk

View File

@ -11,7 +11,6 @@ def test_mail_api(auth, client):
def test_entries_list(auth, client):
extract_exim_emails()
page = client.get('/api/mail/')
assert page.status_code == 200