actesweb: work-around not latin15 encodable characters (fixes #30995)

This commit is contained in:
Benjamin Dauvergne 2019-02-28 16:13:20 +01:00
parent a7ad6bda4b
commit f2e7add34b
4 changed files with 32 additions and 1 deletions

3
debian/control vendored
View File

@ -29,7 +29,8 @@ Depends: ${python:Depends},
python-pyproj,
python-pil,
python-zeep,
python-jsonschema
python-jsonschema,
python-unidecode
Recommends: python-soappy, python-phpserialize
Description: Uniform access to multiple data sources and services (Python module)

View File

@ -31,6 +31,7 @@ from django.utils.translation import ugettext_lazy as _
from passerelle.base.models import BaseResource
from passerelle.utils.api import endpoint
from passerelle.utils.jsonresponse import APIError
from passerelle.utils.conversion import ensure_encoding
@contextlib.contextmanager
@ -80,6 +81,9 @@ class ActesWeb(BaseResource):
else:
os.makedirs(tmp_dir)
# ensure demand_content can be encoded to latin15
demand_content = ensure_encoding(demand_content, 'iso-8859-15')
filename = '%s.DEM' % now().strftime('%Y-%m-%d_%H-%M-%S_%f')
filepath = os.path.join(self.basepath, filename)
with named_tempfile(dir=tmp_dir, suffix='.DEM', delete=False) as tpf:

View File

@ -14,9 +14,12 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import unicodedata
import warnings
from StringIO import StringIO
import unidecode
from PIL import Image
@ -37,3 +40,25 @@ def to_pdf(content):
out = StringIO()
image.save(out, format='PDF')
return out.getvalue()
# copied from
# https://stackoverflow.com/questions/10294032/python-replace-typographical-quotes-dashes-etc-with-their-ascii-counterparts
def char_filter(string):
'''Fallback to ASCII char if found'''
latin = re.compile('[a-zA-Z]+')
for char in unicodedata.normalize('NFC', string):
decoded = unidecode.unidecode(char)
if latin.match(decoded):
yield char
else:
yield decoded
def clean_string(string):
return "".join(char_filter(string))
def ensure_encoding(s, encoding):
s = clean_string(s)
return s.encode(encoding, 'replace').decode(encoding)

View File

@ -107,6 +107,7 @@ setup(name='passerelle',
'jsonschema',
'zeep < 3.0',
'pycrypto',
'unidecode',
],
cmdclass={
'build': build,