21 lines
739 B
Python
21 lines
739 B
Python
from bs4 import BeautifulSoup
|
|
import html5lib
|
|
import requests
|
|
|
|
from combo.data.models import TextCell
|
|
|
|
for cell in TextCell.objects.all():
|
|
if not cell.slug.startswith('mallard-'):
|
|
continue
|
|
mallard_page = cell.slug.split('-', 1)[1]
|
|
for module in ('wcs', 'publik-base-theme'):
|
|
resp = requests.get('https://doc.entrouvert.org/%s/dev/%s.html' % (module, mallard_page))
|
|
if resp.status_code != 200:
|
|
continue
|
|
document = BeautifulSoup(resp.content, 'html5lib')
|
|
content = document.find('div', 'body')
|
|
more_info = document.find('div', 'sect sect-links')
|
|
new_content = unicode(content).replace(unicode(more_info), '')
|
|
cell.text = new_content
|
|
cell.save()
|