doc: add support for crosslinks
This commit is contained in:
parent
ea13ac31d0
commit
b610944c51
|
@ -1,3 +1,4 @@
|
||||||
|
import urlparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import html5lib
|
import html5lib
|
||||||
import requests
|
import requests
|
||||||
|
@ -15,6 +16,19 @@ for cell in TextCell.objects.filter(slug__startswith='mallard-'):
|
||||||
document = BeautifulSoup(resp.content, 'html5lib')
|
document = BeautifulSoup(resp.content, 'html5lib')
|
||||||
content = document.find('div', 'body')
|
content = document.find('div', 'body')
|
||||||
more_info = document.find('div', 'sect sect-links')
|
more_info = document.find('div', 'sect sect-links')
|
||||||
|
for a in document.find_all('a'):
|
||||||
|
href = a.attrs['href']
|
||||||
|
parsed = urlparse.urlparse(href)
|
||||||
|
if parsed.netloc:
|
||||||
|
continue
|
||||||
|
if '/' in parsed.path:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
target_cell = TextCell.objects.get(slug='mallard-%s' % parsed.path.replace('.html', ''))
|
||||||
|
except TextCell.DoesNotExist:
|
||||||
|
continue
|
||||||
|
a.attrs['href'] = target_cell.page.get_online_url()
|
||||||
|
|
||||||
new_content = unicode(content).replace(unicode(more_info), '')
|
new_content = unicode(content).replace(unicode(more_info), '')
|
||||||
cell.text = new_content
|
cell.text = new_content
|
||||||
cell.save()
|
cell.save()
|
||||||
|
|
Loading…
Reference in New Issue