288 lines
8.4 KiB
Python
288 lines
8.4 KiB
Python
# encoding: utf-8
|
|
from __future__ import unicode_literals
|
|
import logging
|
|
import mimetypes
|
|
import os
|
|
from os import path
|
|
import errno
|
|
from zipfile import ZipFile
|
|
import email
|
|
|
|
from emails.compat import to_unicode, string_types
|
|
|
|
|
|
class FileNotFound(Exception):
|
|
pass
|
|
|
|
|
|
def split_template_path(template):
|
|
"""Split a path into segments and perform a sanity check. If it detects
|
|
'..' in the path it will raise a `TemplateNotFound` error.
|
|
"""
|
|
pieces = []
|
|
for piece in template.split('/'):
|
|
if path.sep in piece \
|
|
or (path.altsep and path.altsep in piece) or \
|
|
piece == path.pardir:
|
|
raise FileNotFound(template)
|
|
elif piece and piece != '.':
|
|
pieces.append(piece)
|
|
return pieces
|
|
|
|
|
|
def open_if_exists(filename, mode='rb'):
|
|
"""Returns a file descriptor for the filename if that file exists,
|
|
otherwise `None`.
|
|
"""
|
|
try:
|
|
return open(filename, mode)
|
|
except IOError as e:
|
|
if e.errno not in (errno.ENOENT, errno.EISDIR):
|
|
raise
|
|
|
|
|
|
class BaseLoader(object):
|
|
def __getitem__(self, filename):
|
|
try:
|
|
contents, _ = self.get_source(filename)
|
|
return contents
|
|
except FileNotFound:
|
|
return None
|
|
|
|
def find_index_file(self, filename=None):
|
|
if filename:
|
|
if self[filename]:
|
|
return filename
|
|
else:
|
|
raise FileNotFound(filename)
|
|
|
|
html_files = []
|
|
|
|
for filename in self.list_files():
|
|
|
|
f = path.basename(filename).lower()
|
|
|
|
if f.endswith('.htm') or f.endswith('.html'):
|
|
if f.startswith('index.'):
|
|
return filename
|
|
else:
|
|
html_files.append(filename)
|
|
|
|
if html_files:
|
|
return html_files[0]
|
|
|
|
raise FileNotFound('index html')
|
|
|
|
|
|
# FileSystemLoader from jinja2.loaders
|
|
|
|
class FileSystemLoader(BaseLoader):
|
|
"""Loads templates from the file system. This loader can find templates
|
|
in folders on the file system and is the preferred way to load them.
|
|
|
|
The loader takes the path to the templates as string, or if multiple
|
|
locations are wanted a list of them which is then looked up in the
|
|
given order:
|
|
|
|
>>> loader = FileSystemLoader('/path/to/templates')
|
|
>>> loader = FileSystemLoader(['/path/to/templates', '/other/path'])
|
|
|
|
Per default the template encoding is ``'utf-8'`` which can be changed
|
|
by setting the `encoding` parameter to something else.
|
|
"""
|
|
|
|
def __init__(self, searchpath, encoding='utf-8', base_path=None):
|
|
if isinstance(searchpath, string_types):
|
|
searchpath = [searchpath]
|
|
self.searchpath = list(searchpath)
|
|
self.encoding = encoding
|
|
self.base_path = base_path
|
|
|
|
def get_source(self, template):
|
|
|
|
if self.base_path:
|
|
name = path.join(self.base_path, template)
|
|
|
|
pieces = split_template_path(template)
|
|
for searchpath in self.searchpath:
|
|
filename = path.join(searchpath, *pieces)
|
|
f = open_if_exists(filename)
|
|
if f is None:
|
|
continue
|
|
try:
|
|
contents = f.read().decode(self.encoding)
|
|
finally:
|
|
f.close()
|
|
|
|
return contents, filename
|
|
|
|
raise FileNotFound(template)
|
|
|
|
def list_files(self):
|
|
found = set()
|
|
for searchpath in self.searchpath:
|
|
for dirpath, dirnames, filenames in os.walk(searchpath):
|
|
for filename in filenames:
|
|
template = path.join(dirpath, filename) \
|
|
[len(searchpath):].strip(path.sep) \
|
|
.replace(path.sep, '/')
|
|
if template[:2] == './':
|
|
template = template[2:]
|
|
if template not in found:
|
|
yield template
|
|
|
|
|
|
class ZipLoader(BaseLoader):
|
|
def __init__(self, file, encoding='utf-8', base_path=None):
|
|
self.zipfile = ZipFile(file, 'r')
|
|
self.encoding = encoding
|
|
self.base_path = base_path
|
|
self.mapping = {}
|
|
self._filenames = None
|
|
|
|
def _decode_zip_filename(self, name):
|
|
for enc in ('cp866', 'cp1251', 'utf-8'):
|
|
try:
|
|
return to_unicode(name, enc)
|
|
except UnicodeDecodeError:
|
|
pass
|
|
return name
|
|
|
|
def _unpack_zip(self):
|
|
if self._filenames is None:
|
|
self._filenames = {}
|
|
for name in self.zipfile.namelist():
|
|
decoded_name = self._decode_zip_filename(name)
|
|
self._filenames[decoded_name] = name
|
|
|
|
def get_source(self, name):
|
|
|
|
if self.base_path:
|
|
name = path.join(self.base_path, name)
|
|
|
|
self._unpack_zip()
|
|
|
|
if isinstance(name, str):
|
|
name = to_unicode(name, 'utf-8')
|
|
|
|
data = self.mapping.get(name, None)
|
|
|
|
if data is not None:
|
|
return data, name
|
|
|
|
original_name = self._filenames.get(name)
|
|
|
|
if original_name is None:
|
|
raise FileNotFound(name)
|
|
|
|
data = self.zipfile.read(original_name)
|
|
|
|
return data, name
|
|
|
|
def list_files(self):
|
|
self._unpack_zip()
|
|
return sorted(self._filenames)
|
|
|
|
|
|
class MsgLoader(BaseLoader):
|
|
"""
|
|
Load files from email.Message
|
|
Thanks to
|
|
http://blog.magiksys.net/parsing-email-using-python-content
|
|
"""
|
|
|
|
common_charsets = ['ascii', 'utf-8', 'utf-16', 'windows-1252', 'cp850', 'windows-1251']
|
|
|
|
def __init__(self, msg, base_path=None):
|
|
if isinstance(msg, string_types):
|
|
self.msg = email.message_from_string(msg)
|
|
else:
|
|
self.msg = msg
|
|
self.base_path = base_path
|
|
self._html_files = []
|
|
self._text_files = []
|
|
self._files = {}
|
|
|
|
|
|
def decode_text(self, text, charset=None):
|
|
if charset:
|
|
try:
|
|
return text.decode(charset), charset
|
|
except UnicodeError:
|
|
pass
|
|
for charset in self.common_charsets:
|
|
try:
|
|
return text.decode(charset), charset
|
|
except UnicodeError:
|
|
pass
|
|
return text, None
|
|
|
|
def clean_content_id(self, content_id):
|
|
if content_id.startswith('<'):
|
|
content_id = content_id[1:]
|
|
if content_id.endswith('>'):
|
|
content_id = content_id[:-1]
|
|
return content_id
|
|
|
|
def extract_part_text(self, part):
|
|
return self.decode_text(part.get_payload(decode=True), charset=part.get_param('charset'))[0]
|
|
|
|
def add_html_part(self, part):
|
|
name = '__index.html'
|
|
self._files[name] = {'data': self.extract_part_text(part),
|
|
'filename': name,
|
|
'content_type': part.get_content_type()}
|
|
|
|
def add_text_part(self, part):
|
|
name = '__index.txt'
|
|
self._files[name] = {'data': self.extract_part_text(part),
|
|
'filename': name,
|
|
'content_type': part.get_content_type()}
|
|
|
|
def add_another_part(self, part):
|
|
counter = 1
|
|
f = {}
|
|
content_id = part['Content-ID']
|
|
if content_id:
|
|
f['filename'] = self.clean_content_id(content_id)
|
|
f['inline'] = True
|
|
else:
|
|
filename = part.get_filename()
|
|
if not filename:
|
|
ext = mimetypes.guess_extension(part.get_content_type())
|
|
if not ext:
|
|
# Use a generic bag-of-bits extension
|
|
ext = '.bin'
|
|
filename = 'part-%03d%s' % (counter, ext)
|
|
counter += 1
|
|
f['filename'] = filename
|
|
f['content_type'] = part.get_content_type()
|
|
f['data'] = part.get_payload(decode=True)
|
|
self._files[f['filename']] = f
|
|
|
|
def _parse_msg(self):
|
|
for part in self.msg.walk():
|
|
content_type = part.get_content_type()
|
|
|
|
if content_type.startswith('multipart/'):
|
|
continue
|
|
|
|
if content_type == 'text/html':
|
|
self.add_html_part(part)
|
|
continue
|
|
|
|
if content_type == 'text/plain':
|
|
self.add_text_part(part)
|
|
continue
|
|
|
|
self.add_another_part(part)
|
|
|
|
def get_source(self, name):
|
|
self._parse_msg()
|
|
f = self._files.get(name)
|
|
if f:
|
|
return f['data'], name
|
|
return None, name
|
|
|
|
def list_files(self):
|
|
return self._files |