This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
logtracker/logtracker/journal/journalstream.py

73 lines
2.3 KiB
Python

#!/usr/bin/env python3
# logtracker
# Copyright (c) 2020 Entr'ouvert
import re
field_pattern = re.compile(r'^([A-Z0-9_]+)=(.*)$')
field_multiline_pattern = re.compile(r'^([A-Z_][A-Z0-9_]+)\n([\w\W]*)$')
def handle_journal_upload_stream(journal_stream, debug=False):
tail = ''
while True:
line = journal_stream.readline().decode('utf-8', errors='replace').rstrip('\n')
if line.endswith('\r'):
line = line.rstrip('\r')
if not line:
continue
elif line == '0':
break
else:
if (line[0].islower() or line[0].isdigit()) and len(line) < 6:
if debug:
print('ignore ff7c fff4 3d9a etc.: %s' % line)
else:
tail = tail + line
else:
if tail:
line = tail + line
tail = ''
yield line
def get_journal_entries(journal_stream, debug=False):
store = []
multiline_field = ''
emptylines_count = 0
for line in handle_journal_upload_stream(journal_stream):
if not line:
if multiline_field:
match = field_multiline_pattern.match(multiline_field)
if match:
k, v = match.groups()
store.append((k, v))
else:
if debug:
print('content dropped: %s' % multiline_field)
multiline_field = ''
else:
if store:
yield store
store = []
emptylines_count = 0
else:
emptylines_count += 1
if emptylines_count >= 3:
# disconnect broken stream
break
continue
if line.startswith('__CURSOR') and store:
# sometimes a newline ends a multiline field + ends an entry
yield store
store = []
# jsonb rejects u0000
line = line.replace('\u0000', '')
match = field_pattern.match(line)
if match:
k, v = match.groups()
if v.isdigit():
v = int(v)
store.append((k, v))
else:
multiline_field = multiline_field + line + '\n'