forked from entrouvert/publik-base-theme
83 lines
2.2 KiB
Python
83 lines
2.2 KiB
Python
#! /usr/bin/env python
|
|
|
|
import os
|
|
import subprocess
|
|
import re
|
|
import sys
|
|
from lxml import etree
|
|
from lxml.html import html_parser
|
|
|
|
FILENAME = 'tmp.html'
|
|
|
|
if os.path.exists(FILENAME):
|
|
os.unlink(FILENAME)
|
|
|
|
URL = 'https://www.lametro.fr/501-plateforme-de-services.htm'
|
|
subprocess.call(['wget', '--quiet', '-O', FILENAME, '--convert-links', URL])
|
|
content = open(FILENAME).read().strip()
|
|
if len(content) < 500:
|
|
sys.exit(0)
|
|
|
|
root = etree.fromstring(content, parser=html_parser)
|
|
|
|
for script in root.xpath('//script'):
|
|
# remove all google related tags
|
|
if script.text and 'google' in script.text or 'google' in script.attrib.get('src', ''):
|
|
parent = script.getparent()
|
|
parent.remove(script)
|
|
|
|
# remove title and breadcrumb
|
|
for element in ('title', 'div[@id="arianeUserTools"]'):
|
|
for useless in root.xpath('//%s' % element):
|
|
parent = useless.getparent()
|
|
parent.remove(useless)
|
|
|
|
# rename "contenu" container to put content into it
|
|
for content in root.xpath('//div[@id="contenu"]'):
|
|
content.clear()
|
|
content.attrib['id'] = 'content'
|
|
|
|
for content in root.xpath('//a[@id="eosm-btn"]'):
|
|
content.clear()
|
|
content.attrib['id'] = 'publik-mobile-menu'
|
|
|
|
content = etree.tostring(root, method='html', pretty_print=True)
|
|
|
|
# remove all references to downloaded temporary file
|
|
content = content.replace(FILENAME, '')
|
|
|
|
# remove copy of jquery
|
|
content = re.sub('<script src=.*jquery.min.js.*', '', content)
|
|
|
|
content = content.replace('<script>document.body.className="withJS"</script>', '')
|
|
|
|
content = content.replace('<body', r'<body {% block bodyargs %}{% endblock %} ')
|
|
|
|
content = content.replace('</head>', '<title>{% block global_title %}{% endblock %}</title>\n</head>')
|
|
|
|
content = content.replace('<head>', '<head>{% block head %}{% endblock %}')
|
|
|
|
content = content.replace(
|
|
'<div id="content">',
|
|
'''
|
|
{% if include_top_links != False %}<div id="toplinks">{% block user-info %}{% endblock %}</div>{% endif %}
|
|
{% block nav %}{% endblock %}
|
|
{% block grenoble-content %}
|
|
{% block messages %}
|
|
{% endblock %}
|
|
{% endblock %}''',
|
|
)
|
|
|
|
content = content.replace(
|
|
'</body>',
|
|
'''
|
|
{% block tracking %}
|
|
{% endblock %}
|
|
{% block body-bottom %}
|
|
{% endblock %}
|
|
</body>
|
|
''',
|
|
)
|
|
|
|
open('base-theme.html', 'w').write(content)
|