73 lines
2.3 KiB
Python
73 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
# logtracker
|
|
# Copyright (c) 2020 Entr'ouvert
|
|
import re
|
|
|
|
field_pattern = re.compile(r'^([A-Z0-9_]+)=(.*)$')
|
|
field_multiline_pattern = re.compile(r'^([A-Z_][A-Z0-9_]+)\n([\w\W]*)$')
|
|
|
|
|
|
def handle_journal_upload_stream(journal_stream, debug=False):
|
|
tail = ''
|
|
while True:
|
|
line = journal_stream.readline().decode('utf-8', errors='replace').rstrip('\n')
|
|
if line.endswith('\r'):
|
|
line = line.rstrip('\r')
|
|
if not line:
|
|
continue
|
|
elif line == '0':
|
|
break
|
|
else:
|
|
if (line[0].islower() or line[0].isdigit()) and len(line) < 6:
|
|
if debug:
|
|
print('ignore ff7c fff4 3d9a etc.: %s' % line)
|
|
else:
|
|
tail = tail + line
|
|
else:
|
|
if tail:
|
|
line = tail + line
|
|
tail = ''
|
|
yield line
|
|
|
|
|
|
def get_journal_entries(journal_stream, debug=False):
|
|
store = []
|
|
multiline_field = ''
|
|
emptylines_count = 0
|
|
for line in handle_journal_upload_stream(journal_stream):
|
|
if not line:
|
|
if multiline_field:
|
|
match = field_multiline_pattern.match(multiline_field)
|
|
if match:
|
|
k, v = match.groups()
|
|
store.append((k, v))
|
|
else:
|
|
if debug:
|
|
print('content dropped: %s' % multiline_field)
|
|
multiline_field = ''
|
|
else:
|
|
if store:
|
|
yield store
|
|
store = []
|
|
emptylines_count = 0
|
|
else:
|
|
emptylines_count += 1
|
|
if emptylines_count >= 3:
|
|
# disconnect broken stream
|
|
break
|
|
continue
|
|
if line.startswith('__CURSOR') and store:
|
|
# sometimes a newline ends a multiline field + ends an entry
|
|
yield store
|
|
store = []
|
|
# jsonb rejects u0000
|
|
line = line.replace('\u0000', '')
|
|
match = field_pattern.match(line)
|
|
if match:
|
|
k, v = match.groups()
|
|
if v.isdigit():
|
|
v = int(v)
|
|
store.append((k, v))
|
|
else:
|
|
multiline_field = multiline_field + line + '\n'
|