actesweb: work-around not latin15 encodable characters (fixes #30995)

2019-02-28 16:13:20 +01:00 · 2019-02-28 16:13:20 +01:00 · f2e7add34b
parent a7ad6bda4b
commit f2e7add34b
4 changed files with 32 additions and 1 deletions
--- a/debian/control
+++ b/debian/control
@ -29,7 +29,8 @@ Depends: ${python:Depends},
    python-pyproj,
    python-pil,
    python-zeep,
-    python-jsonschema
+    python-jsonschema,
+    python-unidecode
 Recommends: python-soappy, python-phpserialize
 Description: Uniform access to multiple data sources and services (Python module)

--- a/passerelle/apps/actesweb/models.py
+++ b/passerelle/apps/actesweb/models.py
@ -31,6 +31,7 @@ from django.utils.translation import ugettext_lazy as _
 from passerelle.base.models import BaseResource
 from passerelle.utils.api import endpoint
 from passerelle.utils.jsonresponse import APIError
+from passerelle.utils.conversion import ensure_encoding


@contextlib.contextmanager
@ -80,6 +81,9 @@ class ActesWeb(BaseResource):
            else:
                os.makedirs(tmp_dir)

+        # ensure demand_content can be encoded to latin15
+        demand_content = ensure_encoding(demand_content, 'iso-8859-15')
+
        filename = '%s.DEM' % now().strftime('%Y-%m-%d_%H-%M-%S_%f')
        filepath = os.path.join(self.basepath, filename)
        with named_tempfile(dir=tmp_dir, suffix='.DEM', delete=False) as tpf:
--- a/passerelle/utils/conversion.py
+++ b/passerelle/utils/conversion.py
@ -14,9 +14,12 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

+import re
+import unicodedata
 import warnings
 from StringIO import StringIO

+import unidecode
 from PIL import Image


@ -37,3 +40,25 @@ def to_pdf(content):
    out = StringIO()
    image.save(out, format='PDF')
    return out.getvalue()
+
+
+# copied from
+# https://stackoverflow.com/questions/10294032/python-replace-typographical-quotes-dashes-etc-with-their-ascii-counterparts
+def char_filter(string):
+    '''Fallback to ASCII char if found'''
+    latin = re.compile('[a-zA-Z]+')
+    for char in unicodedata.normalize('NFC', string):
+        decoded = unidecode.unidecode(char)
+        if latin.match(decoded):
+            yield char
+        else:
+            yield decoded
+
+
+def clean_string(string):
+    return "".join(char_filter(string))
+
+
+def ensure_encoding(s, encoding):
+    s = clean_string(s)
+    return s.encode(encoding, 'replace').decode(encoding)
--- a/setup.py
+++ b/setup.py
@ -107,6 +107,7 @@ setup(name='passerelle',
            'jsonschema',
            'zeep < 3.0',
            'pycrypto',
+            'unidecode',
        ],
        cmdclass={
            'build': build,