2014-09-17 06:23:21 +02:00
|
|
|
from __future__ import absolute_import, unicode_literals, print_function
|
2015-02-19 21:33:11 +01:00
|
|
|
import codecs
|
|
|
|
import operator
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import warnings
|
2014-04-11 00:46:02 +02:00
|
|
|
try:
|
2014-10-08 19:28:33 +02:00
|
|
|
from collections import OrderedDict
|
2014-08-22 00:13:03 +02:00
|
|
|
except ImportError: # pragma: no cover
|
2014-10-08 19:28:33 +02:00
|
|
|
# some old python 2.6 thing then, eh?
|
|
|
|
from ordereddict import OrderedDict
|
2014-09-17 06:23:21 +02:00
|
|
|
import sys
|
2015-02-14 17:28:33 +01:00
|
|
|
if sys.version_info >= (3,): # pragma: no cover
|
2014-10-08 19:53:22 +02:00
|
|
|
# As in, Python 3
|
2014-09-17 06:23:21 +02:00
|
|
|
from io import StringIO
|
2015-05-08 18:37:52 +02:00
|
|
|
from urllib.parse import urljoin, urlparse
|
2014-09-17 06:23:21 +02:00
|
|
|
STR_TYPE = str
|
|
|
|
else: # Python 2
|
|
|
|
try:
|
2014-09-24 06:43:38 +02:00
|
|
|
from cStringIO import StringIO
|
2014-09-17 06:23:21 +02:00
|
|
|
except ImportError: # pragma: no cover
|
2015-02-19 20:24:26 +01:00
|
|
|
from StringIO import StringIO
|
|
|
|
StringIO = StringIO # shut up pyflakes
|
2015-05-08 18:37:52 +02:00
|
|
|
from urlparse import urljoin, urlparse
|
2016-01-20 02:39:02 +01:00
|
|
|
STR_TYPE = basestring # NOQA
|
2014-12-26 03:44:13 +01:00
|
|
|
|
2014-02-23 12:53:52 +01:00
|
|
|
import cssutils
|
2016-03-15 19:13:53 +01:00
|
|
|
import requests
|
2014-02-23 12:53:52 +01:00
|
|
|
from lxml import etree
|
|
|
|
from lxml.cssselect import CSSSelector
|
2015-02-15 19:16:45 +01:00
|
|
|
from premailer.merge_style import merge_styles, csstext_to_pairs
|
2015-02-15 22:42:29 +01:00
|
|
|
from premailer.cache import function_cache
|
2009-07-15 13:00:27 +02:00
|
|
|
|
2009-08-04 18:35:21 +02:00
|
|
|
__all__ = ['PremailerError', 'Premailer', 'transform']
|
2009-07-15 13:00:27 +02:00
|
|
|
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-07-15 13:00:27 +02:00
|
|
|
class PremailerError(Exception):
|
|
|
|
pass
|
|
|
|
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2014-08-22 00:13:03 +02:00
|
|
|
class ExternalNotFoundError(ValueError):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2013-10-11 04:38:50 +02:00
|
|
|
def make_important(bulk):
|
|
|
|
"""makes every property in a string !important.
|
|
|
|
"""
|
|
|
|
return ';'.join('%s !important' % p if not p.endswith('!important') else p
|
|
|
|
for p in bulk.split(';'))
|
2009-07-15 13:00:27 +02:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
|
2014-10-17 02:16:28 +02:00
|
|
|
def get_or_create_head(root):
|
|
|
|
"""Ensures that `root` contains a <head> element and returns it.
|
|
|
|
"""
|
|
|
|
head = CSSSelector('head')(root)
|
|
|
|
if not head:
|
|
|
|
head = etree.Element('head')
|
|
|
|
body = CSSSelector('body')(root)[0]
|
|
|
|
body.getparent().insert(0, head)
|
|
|
|
return head
|
|
|
|
else:
|
|
|
|
return head[0]
|
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
|
2015-02-15 22:42:29 +01:00
|
|
|
@function_cache()
|
|
|
|
def _cache_parse_css_string(css_body, validate=True):
|
|
|
|
"""
|
|
|
|
This function will cache the result from cssutils
|
|
|
|
It is a big gain when number of rules is big
|
2015-02-17 21:31:51 +01:00
|
|
|
Maximum cache entries are 1000. This is mainly for
|
|
|
|
protecting memory leak in case something gone wild.
|
2015-02-19 20:24:26 +01:00
|
|
|
Be aware that you can turn the cache off in Premailer
|
2015-02-15 22:42:29 +01:00
|
|
|
|
|
|
|
Args:
|
|
|
|
css_body(str): css rules in string format
|
|
|
|
validate(bool): if cssutils should validate
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
cssutils.css.cssstylesheet.CSSStyleSheet
|
|
|
|
|
|
|
|
"""
|
|
|
|
return cssutils.parseString(css_body, validate=validate)
|
2014-10-17 02:16:28 +02:00
|
|
|
|
2015-10-20 11:40:49 +02:00
|
|
|
|
|
|
|
def capitalize_float_margin(css_body):
|
|
|
|
"""Capitalize float and margin CSS property names
|
|
|
|
"""
|
|
|
|
def _capitalize_property(match):
|
|
|
|
return '{0}:{1}{2}'.format(
|
|
|
|
match.group('property').capitalize(),
|
|
|
|
match.group('value'),
|
|
|
|
match.group('terminator'))
|
|
|
|
|
|
|
|
return _lowercase_margin_float_rule.sub(_capitalize_property, css_body)
|
|
|
|
|
|
|
|
|
2012-12-13 16:57:52 +01:00
|
|
|
_element_selector_regex = re.compile(r'(^|\s)\w')
|
2013-10-11 13:51:10 +02:00
|
|
|
_cdata_regex = re.compile(r'\<\!\[CDATA\[(.*?)\]\]\>', re.DOTALL)
|
2015-10-20 11:40:49 +02:00
|
|
|
_lowercase_margin_float_rule = re.compile(
|
|
|
|
r'''(?P<property>margin(-(top|bottom|left|right))?|float)
|
|
|
|
:
|
|
|
|
(?P<value>.*?)
|
|
|
|
(?P<terminator>$|;)''',
|
|
|
|
re.IGNORECASE | re.VERBOSE)
|
2011-10-15 08:36:12 +02:00
|
|
|
_importants = re.compile('\s*!important')
|
2015-09-23 04:45:57 +02:00
|
|
|
#: The short (3-digit) color codes that cause issues for IBM Notes
|
|
|
|
_short_color_codes = re.compile(r'^#([0-9a-f])([0-9a-f])([0-9a-f])$', re.I)
|
|
|
|
|
2011-10-15 08:36:12 +02:00
|
|
|
# These selectors don't apply to all elements. Rather, they specify
|
|
|
|
# which elements to apply to.
|
|
|
|
FILTER_PSEUDOSELECTORS = [':last-child', ':first-child', 'nth-child']
|
2009-07-15 13:00:27 +02:00
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
|
2009-07-15 13:00:27 +02:00
|
|
|
class Premailer(object):
|
2013-11-14 05:36:31 +01:00
|
|
|
|
2015-02-22 23:38:44 +01:00
|
|
|
attribute_name = 'data-premailer'
|
|
|
|
|
2010-05-19 21:44:10 +02:00
|
|
|
def __init__(self, html, base_url=None,
|
2011-01-21 22:04:07 +01:00
|
|
|
preserve_internal_links=False,
|
2014-05-05 09:40:35 +02:00
|
|
|
preserve_inline_attachments=True,
|
2013-11-14 05:36:31 +01:00
|
|
|
exclude_pseudoclasses=True,
|
|
|
|
keep_style_tags=False,
|
2010-05-19 21:47:35 +02:00
|
|
|
include_star_selectors=False,
|
2016-06-07 15:59:45 +02:00
|
|
|
remove_classes=False,
|
2015-10-20 11:40:49 +02:00
|
|
|
capitalize_float_margin=False,
|
2013-11-14 05:36:31 +01:00
|
|
|
strip_important=True,
|
2013-10-11 13:51:10 +02:00
|
|
|
external_styles=None,
|
2014-10-17 02:16:28 +02:00
|
|
|
css_text=None,
|
2013-10-14 02:15:37 +02:00
|
|
|
method="html",
|
2014-04-16 19:45:59 +02:00
|
|
|
base_path=None,
|
2014-05-15 11:37:20 +02:00
|
|
|
disable_basic_attributes=None,
|
2015-02-15 22:42:29 +01:00
|
|
|
disable_validation=False,
|
2015-06-10 18:40:35 +02:00
|
|
|
cache_css_parsing=True,
|
|
|
|
cssutils_logging_handler=None,
|
2015-05-12 03:14:27 +02:00
|
|
|
cssutils_logging_level=None,
|
2015-08-27 22:38:40 +02:00
|
|
|
disable_leftover_css=False,
|
2015-09-09 22:37:38 +02:00
|
|
|
align_floating_images=True,
|
|
|
|
remove_unset_properties=True):
|
2009-07-15 13:00:27 +02:00
|
|
|
self.html = html
|
|
|
|
self.base_url = base_url
|
2011-01-21 22:04:07 +01:00
|
|
|
self.preserve_internal_links = preserve_internal_links
|
2014-05-05 09:40:35 +02:00
|
|
|
self.preserve_inline_attachments = preserve_inline_attachments
|
2009-08-04 18:35:21 +02:00
|
|
|
self.exclude_pseudoclasses = exclude_pseudoclasses
|
2009-09-11 00:33:18 +02:00
|
|
|
# whether to delete the <style> tag once it's been processed
|
2014-09-27 14:45:03 +02:00
|
|
|
# this will always preserve the original css
|
2009-09-11 00:33:18 +02:00
|
|
|
self.keep_style_tags = keep_style_tags
|
2011-10-15 08:36:12 +02:00
|
|
|
self.remove_classes = remove_classes
|
2015-10-20 11:40:49 +02:00
|
|
|
self.capitalize_float_margin = capitalize_float_margin
|
2009-09-11 00:33:18 +02:00
|
|
|
# whether to process or ignore selectors like '* { foo:bar; }'
|
|
|
|
self.include_star_selectors = include_star_selectors
|
2014-09-17 06:23:21 +02:00
|
|
|
if isinstance(external_styles, STR_TYPE):
|
2010-05-19 21:47:35 +02:00
|
|
|
external_styles = [external_styles]
|
|
|
|
self.external_styles = external_styles
|
2014-10-17 02:16:28 +02:00
|
|
|
if isinstance(css_text, STR_TYPE):
|
|
|
|
css_text = [css_text]
|
|
|
|
self.css_text = css_text
|
2011-10-15 08:36:12 +02:00
|
|
|
self.strip_important = strip_important
|
2013-10-11 13:51:10 +02:00
|
|
|
self.method = method
|
2013-10-14 02:15:37 +02:00
|
|
|
self.base_path = base_path
|
2014-04-19 00:05:57 +02:00
|
|
|
if disable_basic_attributes is None:
|
|
|
|
disable_basic_attributes = []
|
2014-04-16 19:45:59 +02:00
|
|
|
self.disable_basic_attributes = disable_basic_attributes
|
2014-05-15 11:37:20 +02:00
|
|
|
self.disable_validation = disable_validation
|
2015-02-15 22:42:29 +01:00
|
|
|
self.cache_css_parsing = cache_css_parsing
|
2015-05-12 03:14:27 +02:00
|
|
|
self.disable_leftover_css = disable_leftover_css
|
2015-08-27 22:38:40 +02:00
|
|
|
self.align_floating_images = align_floating_images
|
2015-09-09 22:37:38 +02:00
|
|
|
self.remove_unset_properties = remove_unset_properties
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-06-10 18:40:35 +02:00
|
|
|
if cssutils_logging_handler:
|
|
|
|
cssutils.log.addHandler(cssutils_logging_handler)
|
|
|
|
if cssutils_logging_level:
|
|
|
|
cssutils.log.setLevel(cssutils_logging_level)
|
|
|
|
|
2015-02-15 22:42:29 +01:00
|
|
|
def _parse_css_string(self, css_body, validate=True):
|
|
|
|
if self.cache_css_parsing:
|
|
|
|
return _cache_parse_css_string(css_body, validate=validate)
|
|
|
|
|
|
|
|
return cssutils.parseString(css_body, validate=validate)
|
|
|
|
|
2015-02-15 19:16:45 +01:00
|
|
|
def _parse_style_rules(self, css_body, ruleset_index):
|
2015-02-19 20:24:26 +01:00
|
|
|
"""Returns a list of rules to apply to this doc and a list of rules
|
|
|
|
that won't be used because e.g. they are pseudoclasses. Rules
|
|
|
|
look like: (specificity, selector, bulk)
|
|
|
|
for example: ((0, 1, 0, 0, 0), u'.makeblue', u'color:blue').
|
|
|
|
The bulk of the rule should not end in a semicolon.
|
2014-11-20 21:50:58 +01:00
|
|
|
"""
|
|
|
|
|
2016-04-12 03:04:56 +02:00
|
|
|
def format_css_property(prop):
|
|
|
|
if self.strip_important or prop.priority != 'important':
|
|
|
|
return '{0}:{1}'.format(prop.name, prop.value)
|
|
|
|
else:
|
|
|
|
return '{0}:{1} !important'.format(prop.name, prop.value)
|
|
|
|
|
2014-11-20 21:50:58 +01:00
|
|
|
def join_css_properties(properties):
|
2015-02-19 20:24:26 +01:00
|
|
|
""" Accepts a list of cssutils Property objects and returns
|
|
|
|
a semicolon delimitted string like 'color: red; font-size: 12px'
|
2014-11-20 21:50:58 +01:00
|
|
|
"""
|
|
|
|
return ';'.join(
|
2016-04-12 03:04:56 +02:00
|
|
|
format_css_property(prop)
|
2014-11-20 21:50:58 +01:00
|
|
|
for prop in properties
|
|
|
|
)
|
|
|
|
|
2009-08-04 18:35:21 +02:00
|
|
|
leftover = []
|
2011-10-15 09:50:14 +02:00
|
|
|
rules = []
|
2014-02-23 12:53:52 +01:00
|
|
|
# empty string
|
|
|
|
if not css_body:
|
|
|
|
return rules, leftover
|
2015-02-19 20:24:26 +01:00
|
|
|
sheet = self._parse_css_string(
|
|
|
|
css_body,
|
|
|
|
validate=not self.disable_validation
|
|
|
|
)
|
2014-02-23 12:53:52 +01:00
|
|
|
for rule in sheet:
|
2014-02-23 14:07:10 +01:00
|
|
|
# handle media rule
|
|
|
|
if rule.type == rule.MEDIA_RULE:
|
|
|
|
leftover.append(rule)
|
|
|
|
continue
|
2014-06-26 19:14:49 +02:00
|
|
|
# only proceed for things we recognize
|
|
|
|
if rule.type != rule.STYLE_RULE:
|
|
|
|
continue
|
2014-11-20 21:50:58 +01:00
|
|
|
|
|
|
|
# normal means it doesn't have "!important"
|
|
|
|
normal_properties = [
|
2014-12-26 03:44:13 +01:00
|
|
|
prop for prop in rule.style.getProperties()
|
2014-11-20 21:50:58 +01:00
|
|
|
if prop.priority != 'important'
|
|
|
|
]
|
|
|
|
important_properties = [
|
|
|
|
prop for prop in rule.style.getProperties()
|
|
|
|
if prop.priority == 'important'
|
|
|
|
]
|
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
# Create three strings that we can use to add to the `rules`
|
|
|
|
# list later as ready blocks of css.
|
2014-11-20 21:50:58 +01:00
|
|
|
bulk_normal = join_css_properties(normal_properties)
|
|
|
|
bulk_important = join_css_properties(important_properties)
|
2015-02-19 20:24:26 +01:00
|
|
|
bulk_all = join_css_properties(
|
|
|
|
normal_properties + important_properties
|
|
|
|
)
|
2014-11-20 21:50:58 +01:00
|
|
|
|
2014-02-23 12:53:52 +01:00
|
|
|
selectors = (
|
2014-02-26 00:05:15 +01:00
|
|
|
x.strip()
|
|
|
|
for x in rule.selectorText.split(',')
|
2014-02-23 12:53:52 +01:00
|
|
|
if x.strip() and not x.strip().startswith('@')
|
|
|
|
)
|
|
|
|
for selector in selectors:
|
2011-10-15 11:36:35 +02:00
|
|
|
if (':' in selector and self.exclude_pseudoclasses and
|
2013-11-14 05:36:31 +01:00
|
|
|
':' + selector.split(':', 1)[1]
|
2011-10-15 11:36:35 +02:00
|
|
|
not in FILTER_PSEUDOSELECTORS):
|
2009-08-04 18:35:21 +02:00
|
|
|
# a pseudoclass
|
2014-11-20 21:50:58 +01:00
|
|
|
leftover.append((selector, bulk_all))
|
2009-08-04 18:35:21 +02:00
|
|
|
continue
|
2014-08-22 00:13:03 +02:00
|
|
|
elif '*' in selector and not self.include_star_selectors:
|
2009-09-11 00:33:18 +02:00
|
|
|
continue
|
2017-07-25 18:07:38 +02:00
|
|
|
elif selector.startswith(':'):
|
|
|
|
continue
|
|
|
|
|
2012-12-13 16:57:52 +01:00
|
|
|
# Crudely calculate specificity
|
|
|
|
id_count = selector.count('#')
|
|
|
|
class_count = selector.count('.')
|
|
|
|
element_count = len(_element_selector_regex.findall(selector))
|
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
# Within one rule individual properties have different
|
|
|
|
# priority depending on !important.
|
|
|
|
# So we split each rule into two: one that includes all
|
|
|
|
# the !important declarations and another that doesn't.
|
|
|
|
for is_important, bulk in (
|
|
|
|
(1, bulk_important), (0, bulk_normal)
|
|
|
|
):
|
2014-11-20 21:50:58 +01:00
|
|
|
if not bulk:
|
|
|
|
# don't bother adding empty css rules
|
|
|
|
continue
|
|
|
|
specificity = (
|
|
|
|
is_important,
|
|
|
|
id_count,
|
|
|
|
class_count,
|
|
|
|
element_count,
|
|
|
|
ruleset_index,
|
2015-02-14 17:28:33 +01:00
|
|
|
len(rules) # this is the rule's index number
|
2014-11-20 21:50:58 +01:00
|
|
|
)
|
|
|
|
rules.append((specificity, selector, bulk))
|
2017-07-25 18:07:38 +02:00
|
|
|
|
2009-08-04 18:35:21 +02:00
|
|
|
return rules, leftover
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2013-08-02 13:54:29 +02:00
|
|
|
def transform(self, pretty_print=True, **kwargs):
|
2009-07-15 13:00:27 +02:00
|
|
|
"""change the self.html and return it with CSS turned into style
|
|
|
|
attributes.
|
|
|
|
"""
|
2014-12-11 19:03:26 +01:00
|
|
|
if hasattr(self.html, "getroottree"):
|
|
|
|
# skip the next bit
|
|
|
|
root = self.html.getroottree()
|
|
|
|
page = root
|
|
|
|
tree = root
|
2014-04-19 06:27:34 +02:00
|
|
|
else:
|
2014-12-11 19:03:26 +01:00
|
|
|
if self.method == 'xml':
|
2015-02-19 20:24:26 +01:00
|
|
|
parser = etree.XMLParser(
|
|
|
|
ns_clean=False,
|
|
|
|
resolve_entities=False
|
|
|
|
)
|
2014-12-11 19:03:26 +01:00
|
|
|
else:
|
|
|
|
parser = etree.HTMLParser()
|
|
|
|
stripped = self.html.strip()
|
|
|
|
tree = etree.fromstring(stripped, parser).getroottree()
|
|
|
|
page = tree.getroot()
|
|
|
|
# lxml inserts a doctype if none exists, so only include it in
|
|
|
|
# the root if it was in the original html.
|
|
|
|
root = tree if stripped.startswith(tree.docinfo.doctype) else page
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-07-15 13:00:27 +02:00
|
|
|
assert page is not None
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-05-12 03:14:27 +02:00
|
|
|
if self.disable_leftover_css:
|
|
|
|
head = None
|
|
|
|
else:
|
|
|
|
head = get_or_create_head(tree)
|
2015-02-19 20:24:26 +01:00
|
|
|
#
|
|
|
|
# style selectors
|
|
|
|
#
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-07-15 13:00:27 +02:00
|
|
|
rules = []
|
2013-10-14 02:15:37 +02:00
|
|
|
index = 0
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
for element in CSSSelector('style,link[rel~=stylesheet]')(page):
|
2013-06-07 19:26:12 +02:00
|
|
|
# If we have a media attribute whose value is anything other than
|
2015-08-14 19:09:00 +02:00
|
|
|
# 'all' or 'screen', ignore the ruleset.
|
2013-10-14 02:15:37 +02:00
|
|
|
media = element.attrib.get('media')
|
2015-08-14 19:09:00 +02:00
|
|
|
if media and media not in ('all', 'screen'):
|
2013-06-06 20:18:50 +02:00
|
|
|
continue
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-22 23:38:44 +01:00
|
|
|
data_attribute = element.attrib.get(self.attribute_name)
|
2015-02-15 19:16:45 +01:00
|
|
|
if data_attribute:
|
|
|
|
if data_attribute == 'ignore':
|
2015-02-22 23:38:44 +01:00
|
|
|
del element.attrib[self.attribute_name]
|
2015-02-15 19:16:45 +01:00
|
|
|
continue
|
|
|
|
else:
|
|
|
|
warnings.warn(
|
2015-02-22 23:38:44 +01:00
|
|
|
'Unrecognized %s attribute (%r)' % (
|
|
|
|
self.attribute_name,
|
2015-02-15 19:16:45 +01:00
|
|
|
data_attribute,
|
|
|
|
)
|
2014-12-26 03:44:13 +01:00
|
|
|
)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
is_style = element.tag == 'style'
|
|
|
|
if is_style:
|
|
|
|
css_body = element.text
|
|
|
|
else:
|
|
|
|
href = element.attrib.get('href')
|
|
|
|
css_body = self._load_external(href)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
these_rules, these_leftover = self._parse_style_rules(
|
|
|
|
css_body, index
|
|
|
|
)
|
2013-10-14 02:15:37 +02:00
|
|
|
index += 1
|
2009-08-04 18:35:21 +02:00
|
|
|
rules.extend(these_rules)
|
2013-10-14 02:15:37 +02:00
|
|
|
parent_of_element = element.getparent()
|
2014-09-27 14:45:03 +02:00
|
|
|
if these_leftover or self.keep_style_tags:
|
2013-10-14 02:15:37 +02:00
|
|
|
if is_style:
|
|
|
|
style = element
|
|
|
|
else:
|
|
|
|
style = etree.Element('style')
|
|
|
|
style.attrib['type'] = 'text/css'
|
2014-09-27 14:45:03 +02:00
|
|
|
if self.keep_style_tags:
|
|
|
|
style.text = css_body
|
|
|
|
else:
|
|
|
|
style.text = self._css_rules_to_string(these_leftover)
|
2013-10-11 13:51:10 +02:00
|
|
|
if self.method == 'xml':
|
|
|
|
style.text = etree.CDATA(style.text)
|
2014-02-26 00:05:15 +01:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
if not is_style:
|
|
|
|
element.addprevious(style)
|
|
|
|
parent_of_element.remove(element)
|
2014-02-26 00:05:15 +01:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
elif not self.keep_style_tags or not is_style:
|
|
|
|
parent_of_element.remove(element)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2014-10-17 02:16:28 +02:00
|
|
|
# external style files
|
2010-05-19 21:47:35 +02:00
|
|
|
if self.external_styles:
|
|
|
|
for stylefile in self.external_styles:
|
2013-10-14 02:15:37 +02:00
|
|
|
css_body = self._load_external(stylefile)
|
2014-10-17 02:16:28 +02:00
|
|
|
self._process_css_text(css_body, index, rules, head)
|
|
|
|
index += 1
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2014-10-17 02:16:28 +02:00
|
|
|
# css text
|
|
|
|
if self.css_text:
|
|
|
|
for css_body in self.css_text:
|
|
|
|
self._process_css_text(css_body, index, rules, head)
|
2013-10-14 02:15:37 +02:00
|
|
|
index += 1
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
# rules is a tuple of (specificity, selector, styles), where
|
|
|
|
# specificity is a tuple ordered such that more specific
|
|
|
|
# rules sort larger.
|
2012-12-13 16:57:52 +01:00
|
|
|
rules.sort(key=operator.itemgetter(0))
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-15 19:16:45 +01:00
|
|
|
# collecting all elements that we need to apply rules on
|
|
|
|
# id is unique for the lifetime of the object
|
|
|
|
# and lxml should give us the same everytime during this run
|
|
|
|
# item id -> {item: item, classes: [], style: []}
|
|
|
|
elements = {}
|
|
|
|
for _, selector, style in rules:
|
2011-10-15 08:36:12 +02:00
|
|
|
new_selector = selector
|
2009-08-04 18:35:21 +02:00
|
|
|
class_ = ''
|
|
|
|
if ':' in selector:
|
2011-10-15 08:36:12 +02:00
|
|
|
new_selector, class_ = re.split(':', selector, 1)
|
2009-08-04 18:35:21 +02:00
|
|
|
class_ = ':%s' % class_
|
2013-11-14 05:36:31 +01:00
|
|
|
# Keep filter-type selectors untouched.
|
2011-10-15 08:36:12 +02:00
|
|
|
if class_ in FILTER_PSEUDOSELECTORS:
|
|
|
|
class_ = ''
|
|
|
|
else:
|
|
|
|
selector = new_selector
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2017-07-25 18:07:38 +02:00
|
|
|
assert selector
|
2009-07-15 13:00:27 +02:00
|
|
|
sel = CSSSelector(selector)
|
2015-02-15 19:16:45 +01:00
|
|
|
items = sel(page)
|
|
|
|
if len(items):
|
|
|
|
# same so process it first
|
|
|
|
processed_style = csstext_to_pairs(style)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-15 19:16:45 +01:00
|
|
|
for item in items:
|
|
|
|
item_id = id(item)
|
|
|
|
if item_id not in elements:
|
2015-02-19 20:24:26 +01:00
|
|
|
elements[item_id] = {
|
|
|
|
'item': item,
|
|
|
|
'classes': [],
|
|
|
|
'style': [],
|
|
|
|
}
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-15 19:16:45 +01:00
|
|
|
elements[item_id]['style'].append(processed_style)
|
|
|
|
elements[item_id]['classes'].append(class_)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-15 19:16:45 +01:00
|
|
|
# Now apply inline style
|
|
|
|
# merge style only once for each element
|
|
|
|
# crucial when you have a lot of pseudo/classes
|
|
|
|
# and a long list of elements
|
|
|
|
for _, element in elements.items():
|
2015-09-09 22:37:38 +02:00
|
|
|
final_style = merge_styles(
|
|
|
|
element['item'].attrib.get('style', ''),
|
|
|
|
element['style'],
|
|
|
|
element['classes'],
|
|
|
|
remove_unset_properties=self.remove_unset_properties,
|
|
|
|
)
|
|
|
|
if final_style:
|
|
|
|
# final style could be empty string because of
|
|
|
|
# remove_unset_properties
|
|
|
|
element['item'].attrib['style'] = final_style
|
2015-02-19 20:24:26 +01:00
|
|
|
self._style_to_basic_html_attributes(
|
|
|
|
element['item'],
|
|
|
|
final_style,
|
|
|
|
force=True
|
|
|
|
)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2011-10-15 08:36:12 +02:00
|
|
|
if self.remove_classes:
|
|
|
|
# now we can delete all 'class' attributes
|
|
|
|
for item in page.xpath('//@class'):
|
|
|
|
parent = item.getparent()
|
|
|
|
del parent.attrib['class']
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-10-20 11:40:49 +02:00
|
|
|
# Capitalize Margin properties
|
|
|
|
# To fix weird outlook bug
|
|
|
|
# https://www.emailonacid.com/blog/article/email-development/outlook.com-does-support-margins
|
|
|
|
if self.capitalize_float_margin:
|
|
|
|
for item in page.xpath('//@style'):
|
|
|
|
mangled = capitalize_float_margin(item)
|
|
|
|
item.getparent().attrib['style'] = mangled
|
|
|
|
|
2015-08-27 22:38:40 +02:00
|
|
|
# Add align attributes to images if they have a CSS float value of
|
|
|
|
# right or left. Outlook (both on desktop and on the web) are bad at
|
|
|
|
# understanding floats, but they do understand the HTML align attrib.
|
|
|
|
if self.align_floating_images:
|
|
|
|
for item in page.xpath('//img[@style]'):
|
|
|
|
image_css = cssutils.parseStyle(item.attrib['style'])
|
|
|
|
if image_css.float == 'right':
|
|
|
|
item.attrib['align'] = 'right'
|
|
|
|
elif image_css.float == 'left':
|
|
|
|
item.attrib['align'] = 'left'
|
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
#
|
|
|
|
# URLs
|
|
|
|
#
|
2009-07-15 13:00:27 +02:00
|
|
|
if self.base_url:
|
2015-05-08 18:37:52 +02:00
|
|
|
if not urlparse(self.base_url).scheme:
|
2015-05-11 23:53:44 +02:00
|
|
|
raise ValueError('Base URL must have a scheme')
|
2009-07-15 13:00:27 +02:00
|
|
|
for attr in ('href', 'src'):
|
|
|
|
for item in page.xpath("//@%s" % attr):
|
|
|
|
parent = item.getparent()
|
2015-05-08 18:37:52 +02:00
|
|
|
url = parent.attrib[attr]
|
2015-02-19 20:24:26 +01:00
|
|
|
if (
|
|
|
|
attr == 'href' and self.preserve_internal_links and
|
2015-05-08 18:37:52 +02:00
|
|
|
url.startswith('#')
|
2015-02-19 20:24:26 +01:00
|
|
|
):
|
2011-01-21 22:04:07 +01:00
|
|
|
continue
|
2015-02-19 20:24:26 +01:00
|
|
|
if (
|
|
|
|
attr == 'src' and self.preserve_inline_attachments and
|
2015-05-08 18:37:52 +02:00
|
|
|
url.startswith('cid:')
|
2015-02-19 20:24:26 +01:00
|
|
|
):
|
2014-05-05 09:40:35 +02:00
|
|
|
continue
|
2017-07-10 19:44:55 +02:00
|
|
|
if attr == 'href' and url.startswith('tel:'):
|
|
|
|
continue
|
2015-05-08 18:37:52 +02:00
|
|
|
parent.attrib[attr] = urljoin(self.base_url, url)
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2014-12-11 19:03:26 +01:00
|
|
|
if hasattr(self.html, "getroottree"):
|
|
|
|
return root
|
|
|
|
else:
|
|
|
|
kwargs.setdefault('method', self.method)
|
|
|
|
kwargs.setdefault('pretty_print', pretty_print)
|
|
|
|
kwargs.setdefault('encoding', 'utf-8') # As Ken Thompson intended
|
|
|
|
out = etree.tostring(root, **kwargs).decode(kwargs['encoding'])
|
|
|
|
if self.method == 'xml':
|
2015-02-19 20:24:26 +01:00
|
|
|
out = _cdata_regex.sub(
|
|
|
|
lambda m: '/*<![CDATA[*/%s/*]]>*/' % m.group(1),
|
|
|
|
out
|
|
|
|
)
|
2014-12-11 19:03:26 +01:00
|
|
|
if self.strip_important:
|
|
|
|
out = _importants.sub('', out)
|
|
|
|
return out
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2014-01-31 00:19:59 +01:00
|
|
|
def _load_external_url(self, url):
|
2016-03-15 19:13:53 +01:00
|
|
|
return requests.get(url).text
|
2014-01-31 00:19:59 +01:00
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
def _load_external(self, url):
|
|
|
|
"""loads an external stylesheet from a remote url or local path
|
|
|
|
"""
|
2014-04-02 05:57:56 +02:00
|
|
|
if url.startswith('//'):
|
|
|
|
# then we have to rely on the base_url
|
|
|
|
if self.base_url and 'https://' in self.base_url:
|
|
|
|
url = 'https:' + url
|
|
|
|
else:
|
|
|
|
url = 'http:' + url
|
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
if url.startswith('http://') or url.startswith('https://'):
|
2014-01-31 00:19:59 +01:00
|
|
|
css_body = self._load_external_url(url)
|
2013-10-14 02:15:37 +02:00
|
|
|
else:
|
|
|
|
stylefile = url
|
|
|
|
if not os.path.isabs(stylefile):
|
2014-02-26 00:05:15 +01:00
|
|
|
stylefile = os.path.abspath(
|
|
|
|
os.path.join(self.base_path or '', stylefile)
|
|
|
|
)
|
2013-10-14 02:15:37 +02:00
|
|
|
if os.path.exists(stylefile):
|
|
|
|
with codecs.open(stylefile, encoding='utf-8') as f:
|
|
|
|
css_body = f.read()
|
2014-01-31 00:19:59 +01:00
|
|
|
elif self.base_url:
|
2014-09-17 06:23:21 +02:00
|
|
|
url = urljoin(self.base_url, url)
|
2014-04-02 05:57:56 +02:00
|
|
|
return self._load_external(url)
|
2013-10-14 02:15:37 +02:00
|
|
|
else:
|
2014-08-22 00:13:03 +02:00
|
|
|
raise ExternalNotFoundError(stylefile)
|
|
|
|
|
2013-10-14 02:15:37 +02:00
|
|
|
return css_body
|
|
|
|
|
2015-09-23 04:45:57 +02:00
|
|
|
@staticmethod
|
|
|
|
def six_color(color_value):
|
|
|
|
"""Fix background colors for Lotus Notes
|
|
|
|
|
|
|
|
Notes which fails to handle three character ``bgcolor`` codes well.
|
|
|
|
see <https://github.com/peterbe/premailer/issues/114>"""
|
|
|
|
|
|
|
|
# Turn the color code from three to six digits
|
|
|
|
retval = _short_color_codes.sub(r'#\1\1\2\2\3\3', color_value)
|
|
|
|
return retval
|
|
|
|
|
2011-10-15 08:36:12 +02:00
|
|
|
def _style_to_basic_html_attributes(self, element, style_content,
|
|
|
|
force=False):
|
2011-10-15 09:50:14 +02:00
|
|
|
"""given an element and styles like
|
2009-08-30 18:24:34 +02:00
|
|
|
'background-color:red; font-family:Arial' turn some of that into HTML
|
|
|
|
attributes. like 'bgcolor', etc.
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-08-30 18:24:34 +02:00
|
|
|
Note, the style_content can contain pseudoclasses like:
|
|
|
|
'{color:red; border:1px solid green} :visited{border:1px solid green}'
|
|
|
|
"""
|
2015-02-19 20:24:26 +01:00
|
|
|
if (
|
|
|
|
style_content.count('}') and
|
2015-02-23 18:55:05 +01:00
|
|
|
style_content.count('{') == style_content.count('}')
|
2015-02-19 20:24:26 +01:00
|
|
|
):
|
2009-08-30 18:24:34 +02:00
|
|
|
style_content = style_content.split('}')[0][1:]
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2014-10-01 04:13:01 +02:00
|
|
|
attributes = OrderedDict()
|
2009-08-30 18:24:34 +02:00
|
|
|
for key, value in [x.split(':') for x in style_content.split(';')
|
2011-10-15 09:50:14 +02:00
|
|
|
if len(x.split(':')) == 2]:
|
2009-08-30 18:24:34 +02:00
|
|
|
key = key.strip()
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-08-30 18:24:34 +02:00
|
|
|
if key == 'text-align':
|
|
|
|
attributes['align'] = value.strip()
|
2014-09-25 21:37:32 +02:00
|
|
|
elif key == 'vertical-align':
|
|
|
|
attributes['valign'] = value.strip()
|
2016-01-13 19:00:05 +01:00
|
|
|
elif (
|
|
|
|
key == 'background-color' and
|
|
|
|
'transparent' not in value.lower()
|
|
|
|
):
|
2015-09-23 04:45:57 +02:00
|
|
|
# Only add the 'bgcolor' attribute if the value does not
|
|
|
|
# contain the word "transparent"; before we add it possibly
|
|
|
|
# correct the 3-digit color code to its 6-digit equivalent
|
|
|
|
# ("abc" to "aabbcc") so IBM Notes copes.
|
|
|
|
attributes['bgcolor'] = self.six_color(value.strip())
|
2011-08-30 18:11:16 +02:00
|
|
|
elif key == 'width' or key == 'height':
|
2009-08-30 18:24:34 +02:00
|
|
|
value = value.strip()
|
|
|
|
if value.endswith('px'):
|
|
|
|
value = value[:-2]
|
2011-08-30 18:11:16 +02:00
|
|
|
attributes[key] = value
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2009-08-30 18:24:34 +02:00
|
|
|
for key, value in attributes.items():
|
2015-02-19 20:24:26 +01:00
|
|
|
if (
|
|
|
|
key in element.attrib and not force or
|
|
|
|
key in self.disable_basic_attributes
|
|
|
|
):
|
2009-08-30 18:24:34 +02:00
|
|
|
# already set, don't dare to overwrite
|
|
|
|
continue
|
|
|
|
element.attrib[key] = value
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2014-08-08 19:08:58 +02:00
|
|
|
def _css_rules_to_string(self, rules):
|
|
|
|
"""given a list of css rules returns a css string
|
|
|
|
"""
|
|
|
|
lines = []
|
|
|
|
for item in rules:
|
|
|
|
if isinstance(item, tuple):
|
|
|
|
k, v = item
|
|
|
|
lines.append('%s {%s}' % (k, make_important(v)))
|
|
|
|
# media rule
|
|
|
|
else:
|
|
|
|
for rule in item.cssRules:
|
|
|
|
if isinstance(rule, cssutils.css.csscomment.CSSComment):
|
|
|
|
continue
|
|
|
|
for key in rule.style.keys():
|
|
|
|
rule.style[key] = (
|
|
|
|
rule.style.getPropertyValue(key, False),
|
|
|
|
'!important'
|
|
|
|
)
|
|
|
|
lines.append(item.cssText)
|
|
|
|
return '\n'.join(lines)
|
|
|
|
|
2014-10-17 02:16:28 +02:00
|
|
|
def _process_css_text(self, css_text, index, rules, head):
|
2015-02-19 20:24:26 +01:00
|
|
|
"""processes the given css_text by adding rules that can be
|
|
|
|
in-lined to the given rules list and adding any that cannot
|
|
|
|
be in-lined to the given `<head>` element.
|
2014-10-17 02:16:28 +02:00
|
|
|
"""
|
2015-02-15 19:16:45 +01:00
|
|
|
these_rules, these_leftover = self._parse_style_rules(css_text, index)
|
2014-10-17 02:16:28 +02:00
|
|
|
rules.extend(these_rules)
|
2015-05-12 03:14:27 +02:00
|
|
|
if head is not None and (these_leftover or self.keep_style_tags):
|
2014-10-17 02:16:28 +02:00
|
|
|
style = etree.Element('style')
|
|
|
|
style.attrib['type'] = 'text/css'
|
|
|
|
if self.keep_style_tags:
|
|
|
|
style.text = css_text
|
|
|
|
else:
|
|
|
|
style.text = self._css_rules_to_string(these_leftover)
|
|
|
|
head.append(style)
|
2015-02-15 22:42:29 +01:00
|
|
|
|
2015-02-19 20:24:26 +01:00
|
|
|
|
2009-07-15 13:00:27 +02:00
|
|
|
def transform(html, base_url=None):
|
2009-08-03 11:38:54 +02:00
|
|
|
return Premailer(html, base_url=base_url).transform()
|
2011-10-15 09:50:14 +02:00
|
|
|
|
2013-11-14 05:36:31 +01:00
|
|
|
|
2014-08-22 00:13:03 +02:00
|
|
|
if __name__ == '__main__': # pragma: no cover
|
2013-11-14 05:36:31 +01:00
|
|
|
html = """<html>
|
|
|
|
<head>
|
|
|
|
<title>Test</title>
|
|
|
|
<style>
|
|
|
|
h1, h2 { color:red; }
|
|
|
|
strong {
|
|
|
|
text-decoration:none
|
|
|
|
}
|
|
|
|
p { font-size:2px }
|
|
|
|
p.footer { font-size: 1px}
|
|
|
|
</style>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<h1>Hi!</h1>
|
|
|
|
<p><strong>Yes!</strong></p>
|
|
|
|
<p class="footer" style="color:red">Feetnuts</p>
|
|
|
|
</body>
|
|
|
|
</html>"""
|
|
|
|
p = Premailer(html)
|
2016-01-20 02:39:02 +01:00
|
|
|
print(p.transform())
|