From b610944c51d0c08f41cdb6ee130d763550337449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9ters?= Date: Tue, 20 Mar 2018 10:56:05 +0100 Subject: [PATCH] doc: add support for crosslinks --- doc-publik/update-publik-doc-from-mallard.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc-publik/update-publik-doc-from-mallard.py b/doc-publik/update-publik-doc-from-mallard.py index df01a55..2bfc08e 100644 --- a/doc-publik/update-publik-doc-from-mallard.py +++ b/doc-publik/update-publik-doc-from-mallard.py @@ -1,3 +1,4 @@ +import urlparse from bs4 import BeautifulSoup import html5lib import requests @@ -15,6 +16,19 @@ for cell in TextCell.objects.filter(slug__startswith='mallard-'): document = BeautifulSoup(resp.content, 'html5lib') content = document.find('div', 'body') more_info = document.find('div', 'sect sect-links') + for a in document.find_all('a'): + href = a.attrs['href'] + parsed = urlparse.urlparse(href) + if parsed.netloc: + continue + if '/' in parsed.path: + continue + try: + target_cell = TextCell.objects.get(slug='mallard-%s' % parsed.path.replace('.html', '')) + except TextCell.DoesNotExist: + continue + a.attrs['href'] = target_cell.page.get_online_url() + new_content = unicode(content).replace(unicode(more_info), '') cell.text = new_content cell.save()