passerelle/passerelle/utils/conversion.py

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2016 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import codecs
import math
import re
import unicodedata
import warnings
from io import BytesIO

import unidecode
from django.utils.encoding import force_text
from PIL import Image


def to_pdf(content):
    if content.startswith((b'%PDF', codecs.BOM + b'%PDF', codecs.BOM_UTF8 + b'%PDF')):
        return content
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('error', Image.DecompressionBombWarning)
            image = Image.open(BytesIO(content))
    except IOError:
        raise ValueError('invalid image')
    except Image.DecompressionBombWarning:
        raise ValueError('unsafe image')
    if image.mode != 'RGB':
        # PDF cannot handle alpha (RGBA)
        image = image.convert('RGB')
    out = BytesIO()
    image.save(out, format='PDF')
    return out.getvalue()


# copied from
# https://stackoverflow.com/questions/10294032/python-replace-typographical-quotes-dashes-etc-with-their-ascii-counterparts
def char_filter(string):
    '''Fallback to ASCII char if found'''
    latin = re.compile('[a-zA-Z]+')
    for char in unicodedata.normalize('NFC', string):
        decoded = unidecode.unidecode(char)
        if latin.match(decoded):
            yield char
        else:
            yield decoded


def clean_string(string):
    return "".join(char_filter(string))


def ensure_encoding(s, encoding):
    s = clean_string(s)
    return s.encode(encoding, 'replace').decode(encoding)


def to_ascii(s):
    return force_text(unidecode.unidecode(s), 'ascii')


def exception_to_text(e):
    try:
        return str(e)
    except Exception:
        pass

    try:
        r = repr(e)
        return str(r, errors='replace')
    except Exception:
        pass

    try:
        args = e.args
        try:
            content = str(repr(args)) if args != [] else ''
        except Exception:
            content = '<exception-while-rendering-args>'
    except AttributeError:
        content = ''
    return '%s(%s)' % (e.__class__.__name__, content)


def normalize(s):
    return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').decode('ascii')


def simplify(s):
    """
    Simplify a string, trying to transform it to lower ascii chars (a-z, 0-9)
    and minimize spaces. Used to compare strings on ?q=something requests.
    """
    if not s:
        return ''
    s = force_text(s, 'utf-8', 'ignore')
    s = normalize(s)
    s = re.sub(r'[^\w\s\'-]', '', s)
    s = re.sub(r'[\s\'_-]+', ' ', s)
    return s.strip().lower()


def num2deg(xtile, ytile, zoom):
    # http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Python
    n = 2.0**zoom
    lon_deg = xtile / n * 360.0 - 180.0
    lat_rad = math.atan(math.sinh(math.pi * (1 - 2 * ytile / n)))
    lat_deg = math.degrees(lat_rad)
    return (lon_deg, lat_deg)


def any2bool(value, default=False):
    '''Try to convert anything as a boolean value.'''

    if value is True or str(value).lower() in ('true', 'oui', '1'):
        return True
    if value is False or str(value).lower() in ('false', 'non', '0'):
        return False
    return default