74 lines
2.2 KiB
Python
74 lines
2.2 KiB
Python
#! /usr/bin/env python
|
|
|
|
import os
|
|
import subprocess
|
|
import re
|
|
import sys
|
|
from lxml import etree
|
|
from lxml.html import html_parser
|
|
|
|
|
|
FILENAME = os.path.join('tmp.html')
|
|
|
|
if os.path.exists(FILENAME):
|
|
os.unlink(FILENAME)
|
|
|
|
URL = 'https://www.hautes-alpes.fr/5448-services-en-ligne.htm'
|
|
subprocess.call(['wget', '--quiet', '-O', FILENAME, '--convert-links', URL])
|
|
content = open(FILENAME).read().strip()
|
|
if len(content) < 500:
|
|
sys.exit(0)
|
|
|
|
root = etree.fromstring(content, parser=html_parser)
|
|
|
|
for script in root.xpath('//script'):
|
|
# remove all google related tags
|
|
if script.text and 'google' in script.text or 'google' in script.attrib.get('src', ''):
|
|
parent = script.getparent()
|
|
parent.remove(script)
|
|
|
|
# remove title and breadcrumb
|
|
for element in ('title', 'nav[@id="ariane"]', 'div[@id="cookie_cnil"]'):
|
|
for useless in root.xpath('//%s' % element):
|
|
parent = useless.getparent()
|
|
parent.remove(useless)
|
|
|
|
# clear "corps" container to put content into it
|
|
for main in root.xpath('//main[@id="corps"]'):
|
|
main.clear()
|
|
main.attrib['id'] = 'corps'
|
|
|
|
content = etree.tostring(root, method='html', pretty_print=True)
|
|
|
|
# remove all references to downloaded temporary file
|
|
content = content.replace(FILENAME, '')
|
|
|
|
# remove copy of jquery
|
|
content = re.sub('<script( type=.*)? src=.*jquery.min.js.*', '', content)
|
|
content = re.sub('<script( type=.*)? src=.*formCtrl.*.js.*', '', content)
|
|
|
|
content = content.replace('<body', r'<body {% block bodyargs %}{% endblock %} ')
|
|
|
|
content = content.replace('</head>', '<title>{% block global_title %}{% endblock %}</title>\n</head>')
|
|
|
|
content = content.replace('<head>', '<head>{% block head %}{% endblock %}')
|
|
|
|
content = content.replace(
|
|
'<main id="corps">',
|
|
'''<main id="corps">
|
|
{% block nav %}{% endblock %}
|
|
{% block messages %}{% endblock %}
|
|
{% block local-content-wrapper %}
|
|
{% endblock %}''',
|
|
)
|
|
|
|
content = content.replace('</body>', '{% block local-body-bottom %}{% endblock %}</body>')
|
|
|
|
content = content.replace("SERVER_ROOT = '/'", 'SERVER_ROOT="//www.hautes-alpes.fr/"')
|
|
|
|
content = content.replace(
|
|
'<div id="toolbarTraduction">', '<div id="toolbarTraduction">{% block user-info %}{% endblock %}'
|
|
)
|
|
|
|
open(os.path.join('.', 'base-theme.html'), 'w').write(content)
|