passerelle-atreal-openads/atreal_openads/models.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# passerelle - uniform access to multiple data sources and services
# Copyright (C) 2018 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import base64
import urlparse
import datetime
import os
import re
import magic
import hashlib
import copy

from HTMLParser import HTMLParser

from django.db import models
from django.http import Http404
from django.utils.translation import ugettext_lazy as _
from django.core.files import File
from django.core.files.base import ContentFile

from passerelle.base.models import BaseResource
from passerelle.utils.api import endpoint
from passerelle.utils.jsonresponse import APIError


class MLStripper(HTMLParser):
    """HTML parser that removes html tags."""
    def __init__(self):
        self.reset()
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)


def strip_tags(html):
    """Remove html tags from a string."""
    s = MLStripper()
    s.feed(html)
    return s.get_data()


def clean_spaces(text):
    """Remove extra spaces an line breaks from a string."""
    text = text.replace('\n', ' ')
    text = text.replace('\r', ' ')
    text = text.replace('\t', ' ')
    text = text.replace('\\n', ' ')
    text = text.replace('\\r', ' ')
    text = text.replace('\\t', ' ')
    return re.sub(r'  +', ' ', text).strip()


def normalize(value):
    """Normalize a value to be send to openADS.API."""
    if value is None:
        return ''
    return clean_spaces(str(value))


def get_file_data(path, b64=True):
    """Return the content of a file as a string, in base64 if specified."""
    with open(path, 'r') as f:
        if b64:
            return base64.b64encode(f.read())
        return f.read()


def get_upload_path(instance, filename):
    """Return a relative upload path for a file."""
    # be careful:
    #   * openADS accept only filename less than 50 chars
    #   * name should be unique, even if the content is the same
    return 'pass_openADS_up_%s_%s' % (
        datetime.datetime.now().strftime('%Y-%b-%d_%Hh%Mm%Ss%f'),
        instance.file_hash[:4]
    )


class ForwardFile(models.Model):
    """Represent a file uploaded by a user, to be forwarded to openADS.API."""
    numero_demande = models.CharField(max_length=20)
    numero_dossier = models.CharField(max_length=20)
    type_fichier   = models.CharField(max_length=10)
    file_hash      = models.CharField(max_length=100, default='', blank=True)
    orig_filename  = models.CharField(max_length=100, default='', blank=True)
    content_type   = models.CharField(max_length=100, default='', blank=True)
    upload_file    = models.FileField(upload_to=get_upload_path, null=True)
    upload_status  = models.CharField(max_length=10, default='', blank=True)
    upload_msg     = models.CharField(max_length=255, default='', blank=True)
    last_update_datetime = models.DateTimeField(auto_now=True)


class AtrealOpenads(BaseResource):
    """API that proxy/relay communications with/to openADS."""

    collectivite = models.CharField(_('Collectivity (identifier)'), max_length=255,
            help_text=_('ex: Marseille, or ex: 3'), default='', blank=True)
    openADS_API_login = models.CharField(_('openADS API login'), max_length=255,
            help_text=_('ex: user1234'), default='')
    openADS_API_password = models.CharField(_('openADS API password'), max_length=255,
            help_text=_('ex: ah9pGbKKHv5ToF3cPQuV'), default='')
    openADS_API_url = models.URLField(_('openADS API URL'), max_length=255,
            help_text=_('ex: https://openads.your_domain.net/api/'), default='')
    extra_debug = models.BooleanField(_('Extra debug'),
            help_text=_('ex: True'), default=0)

    openADS_API_timeout = 3600

    category = _('Business Process Connectors')

    api_description = _('''
        This API provides exchanges with openADS.
        ''')

    class Meta:
        verbose_name = _('openADS')


    @property
    def extra_debug_enabled(self):
        """Return True if 'extra debug' is enabled."""
        return bool(self.extra_debug)


    def debug(self, *args, **kwargs):
        """Log a debug message only if 'extra debug' is enabled."""
        if self.extra_debug_enabled:
            self.logger.debug(*args, **kwargs)


    def check_status(self):
        """Check avaibility of the openADS.API service."""
        url = urlparse.urljoin(self.openADS_API_url, '__api__')
        response = self.requests.get(url, auth=(self.openADS_API_login, self.openADS_API_password))
        response.raise_for_status()
        return {'response': response.status_code}


    @endpoint(description="Test an openADS 'connexion'")
    def test_openads_connexion(self, request):
        return self.check_status()


    @endpoint(
        description="Create an openADS 'dossier'",
        methods=['post'],
        pattern='^(?P<type_dossier>\w+)/?$',
        example_pattern='{type_dossier}/',
        parameters={
             'type_dossier': {'description': _("Type of 'dossier'"), 'example_value': 'DIA'}
        })
    def create_dossier(self, request, type_dossier, *args, **kwargs):
        self.debug("----- request json (begining) -----")
        json_data = json.loads(request.body)
        debug_json = copy.deepcopy(json_data)
        file_keys = ['cerfa'] + ['plan_cadastral_%s' % i for i in range(1,5)]
        for k in file_keys:
            if k in debug_json['fields'] \
            and debug_json['fields'][k] \
            and isinstance(debug_json['fields'][k], dict) \
            and 'content' in debug_json['fields'][k]:
                debug_json['fields'][k]['content'] = '<b64 content>'
        self.debug(json.dumps(debug_json))
        self.debug("----- request json (end) -----")

        payload = { "collectivite": self.collectivite }

        payload["terrain"] = {
            "numero_voie": normalize(json_data['fields']['terrain_numero_voie']),
            "nom_voie"   : normalize(json_data['fields']['terrain_nom_voie']),
            "code_postal": normalize(json_data['fields']['terrain_code_postal']),
            "localite"   : normalize(json_data['fields']['terrain_localite']),
            "references_cadastrales": []
        }
        if 'terrain_lieu_dit' in json_data['fields'] and json_data['fields']['terrain_lieu_dit']:
            payload["terrain"]["lieu_dit"] = normalize(json_data['fields']['terrain_lieu_dit'])

        for ref in json_data['fields']['reference_cadastrale']:
            payload["terrain"]["references_cadastrales"].append({
                "prefixe": normalize(ref[0]),
                "section": normalize(ref[1]),
                "numero" : normalize(ref[2])
            })
        if json_data['fields']['autres_parcelles']:
            for ref in json_data['fields']['references_cadastrales']:
                payload["terrain"]["references_cadastrales"].append({
                    "prefixe": normalize(ref[0]),
                    "section": normalize(ref[1]),
                    "numero" : normalize(ref[2])
                })

        prefix = ''
        if json_data['fields']['proprietaire'] != 'Oui':
            prefix = 'mandataire_'

        demandeur = {
            "type_personne": 'particulier' if normalize(json_data['fields']['proprietaire_qualite']) == 'Un particulier' else 'personne morale',
            "typologie"    : 'petitionnaire' if normalize(json_data['fields']['proprietaire']) == 'Oui' else 'mandataire',
            "nom"          : normalize(json_data['fields']['%snom'    % prefix]),
            "prenom"       : normalize(json_data['fields']['%sprenom' % prefix]),
            "adresse": {
                "numero_voie": normalize(json_data['fields']['%snumero_voie' % prefix]),
                "nom_voie"   : normalize(json_data['fields']['%snom_voie'    % prefix]),
                "code_postal": normalize(json_data['fields']['%scode_postal' % prefix]),
                "localite"   : normalize(json_data['fields']['%slocalite'    % prefix])
            }
        }
        if '%slieu_dit' % prefix in json_data['fields'] and json_data['fields']['%slieu_dit' % prefix]:
            demandeur["adresse"]["lieu_dit"] = normalize(json_data['fields']['%slieu_dit' % prefix])

        payload["demandeurs"] = [demandeur]

        self.debug("----- payload (begining) -----")
        self.debug(json.dumps(payload))
        self.debug("----- payload (end) -----")

        files = []
        file_keys = ['cerfa'] + ['plan_cadastral_%s' % i for i in range(1,5)]
        for k in file_keys:
            if k in json_data['fields'] \
            and json_data['fields'][k] \
            and isinstance(json_data['fields'][k], dict) \
            and 'content' in json_data['fields'][k]:
                content      = base64.b64decode(json_data['fields'][k]['content'])
                content_type = magic.from_buffer(content, mime=True)
                upload_file  = ContentFile(content)
                file_hash    = self.file_digest(upload_file)
                filename     = file_hash + '.pdf'
                if 'content_type' in json_data['fields'][k]:
                    content_type = json_data['fields'][k]['content_type']
                if k == 'cerfa' and content_type != 'application/pdf':
                    self.logger.warning("CERFA content type is '%s' instead of '%s'", content_type, 'application/pdf')
                if 'filename' in json_data['fields'][k]:
                    filename = json_data['fields'][k]['filename']
                files.append({
                    'type_fichier' : 'CERFA' if k == 'cerfa' else 'plan',
                    'orig_filename': filename,
                    'content_type' : content_type,
                    'file_hash'    : file_hash,
                    'upload_file'  : upload_file
                })

        self.debug("----- files (begining) -----")
        self.debug(files)
        self.debug("----- files (end) -----")

        url = urlparse.urljoin(self.openADS_API_url, '/dossiers/%s' % type_dossier)
        response = self.requests.post(
            url,
            json=payload,
            auth=(self.openADS_API_login, self.openADS_API_password),
            timeout=self.openADS_API_timeout
        )
        if response.status_code // 100 != 2:
            error = self.get_response_error(response)
            self.logger.warning("Request [POST] '%s' failed with error: '%s'", url, error)
            raise APIError(error)
        try:
            result = response.json()
        except ValueError:
            raise APIError('No JSON content returned: %r' % response.content[:1000])

        self.debug("----- response json (begining) -----")
        debug_json = copy.deepcopy(result)
        if 'files' in debug_json \
        and debug_json['files'] \
        and isinstance(debug_json['files'], list) \
        and len(debug_json['files']) > 0 \
        and isinstance(debug_json['files'][0], dict) \
        and 'b64_content' in debug_json['files'][0]:
            debug_json['files'][0]['b64_content'] = '<b64 content>'
        self.debug(json.dumps(debug_json))
        self.debug("----- response json (end) -----")

        numero_dossier = result.get('numero_dossier')
        self.debug("Numéro dossier: %s", str(numero_dossier))

        recepisse = result['files'][0]
        try:
            recepisse_content = base64.b64decode(recepisse['b64_content'])
        except TypeError:
            raise APIError('Invalid content for recepisse')
        self.debug("Successfully decoded recepisse from base64")

        if recepisse['content_type'] and recepisse['content_type'] != 'application/pdf':
            self.debug(
                "Forcing 'recepisse' content type to '%s' instead of '%s'.",
                'application/pdf',
                recepisse['content_type']
            )
            recepisse['content_type'] = 'application/pdf'

        if files:
            file_ids = []
            for f in files:
                rand_id  = base64.urlsafe_b64encode(os.urandom(6))
                FF = ForwardFile()
                FF.numero_demande = rand_id
                FF.numero_dossier = numero_dossier
                for k in ['type_fichier', 'orig_filename', 'content_type', 'file_hash']:
                    setattr(FF, k, f[k])
                FF.upload_file.save(FF.orig_filename, f['upload_file'])
                FF.upload_status  = 'pending'
                FF.save()
                self.debug(
                    "Created ForwardFile '%s' for file '%s' (%s)",
                    FF.id,
                    FF.orig_filename,
                    FF.upload_file.path
                )
                file_ids.append(FF.id)

            job = self.add_job('upload_user_files',
                         natural_id=numero_dossier,
                         type_dossier=type_dossier,
                         numero_dossier=numero_dossier,
                         file_ids=file_ids)
            self.debug(
                "Added a job '%s' for dossier '%s' (%s) with file ids '%s'",
                job.id,
                numero_dossier,
                type_dossier,
                ','.join([str(fid) for fid in file_ids])
            )

        return {
            'numero_dossier': numero_dossier,
            'recepisse'     : recepisse
        }


    @endpoint(
        description="Get informations about an openADS 'dossier'",
        pattern='^(?P<type_dossier>\w+)/?$',
        example_pattern='{type_dossier}/',
        parameters={
             'type_dossier'  : {'description': _("Type of 'dossier'")       , 'example_value': 'DIA'},
             'numero_dossier': {'description': _("Identifier for 'dossier'"), 'example_value': 'DIA0130551900001'}
        })
    def get_dossier(self, request, type_dossier, numero_dossier, *args, **kwargs):
        url = urlparse.urljoin(self.openADS_API_url, '/dossier/%s/%s' % (type_dossier, numero_dossier))
        response = self.requests.get(url, auth=(self.openADS_API_login, self.openADS_API_password))
        if response.status_code // 100 != 2:
            error = self.get_response_error(response)
            self.logger.warning("Request [GET] '%s' failed with error: '%s'", url, error)
            raise APIError(error)
        try:
            result = response.json()
        except ValueError:
            raise APIError('No JSON content returned: %r' % response.content[:1000])
        etat          = result.get('etat')
        date_depot    = result.get('date_depot')
        date_decision = result.get('date_decision')
        decision      = result.get('decision')
        date_limite_instruction = result.get('date_limite_instruction')
        return response.json()


    def upload2ForwardFile(self, path, numero_dossier, type_fichier='CERFA'):
        """Convert a file path to a ForwardFile."""
        if path:
            rand_id  = base64.urlsafe_b64encode(os.urandom(6))
            fwd_file = ForwardFile()
            fwd_file.numero_demande  = rand_id
            fwd_file.numero_dossier  = numero_dossier
            fwd_file.type_fichier    = type_fichier
            fwd_file.orig_filename   = os.path.basename(path)
            fwd_file.content_type    = magic.from_file(path, mime=True)
            with open(path, 'r') as fp:
                fwd_file.file_hash   = self.file_digest(fp)
            fwd_file.upload_file     = File(open(path, 'r'))
            fwd_file.upload_status   = 'pending'
            return fwd_file
        return None


    @endpoint(
        description="Get informations about the forwarding of a user file to openADS",
        methods=['get'],
        parameters={
             'numero_dossier': {'description': _("Identifier for 'dossier'"), 'example_value': 'DIA0130551900001'},
             'fichier_id'    : {'description': _("File identifier")         , 'example_value': '78'},
             'summary'       : {'description': _("Summary (only)")          , 'example_value': '1'}
        })
    def get_fwd_files_status(self, request, numero_dossier, fichier_id=None, summary=None, *args, **kwargs):
        payload = []
        fwd_files = []

        if not fichier_id:
            try:
                fwd_files = ForwardFile.objects.filter(numero_dossier=numero_dossier)
            except ForwardFile.DoesNotExist:
                raise Http404("No file matches 'numero_dossier=%s'." % numero_dossier)
        elif fichier_id:
            try:
                fwd_file = ForwardFile.objects.get(id=fichier_id)
            except ForwardFile.DoesNotExist:
                raise Http404("No file matches 'numero_dossier=%s' and 'id=%s'." % (numero_dossier, fichier_id))
            if fwd_file:
                fwd_files.append(fwd_file)

        if fwd_files:

            summary_enabled = summary and len(summary) and summary == '1'
            if summary_enabled:
                summary_data = {
                    'all_forwarded': True,
                    'pending'   : [],
                    'uploading' : [],
                    'success'   : [],
                    'failed'    : []
                }

            for fwd_file in fwd_files:
                b64content = None
                if fwd_file.upload_file:
                    b64content = base64.b64encode(fwd_file.upload_file.read())
                payload.append({
                    'id'            : fwd_file.id,
                    'numero_demande': fwd_file.numero_demande,
                    'numero_dossier': fwd_file.numero_dossier,
                    'type_fichier'  : fwd_file.type_fichier,
                    'file_hash'     : fwd_file.file_hash,
                    'orig_filename' : fwd_file.orig_filename,
                    'content_type'  : fwd_file.content_type,
                    'upload_status' : fwd_file.upload_status,
                    'upload_msg'    : fwd_file.upload_msg,
                    'b64_content'   : b64content,
                    'last_update_datetime' : fwd_file.last_update_datetime
                })

                if summary_enabled:
                    status_msg = '[%s] %s => %s' % (fwd_file.id, fwd_file.orig_filename, fwd_file.upload_msg)
                    summary_data[fwd_file.upload_status].append(status_msg)
                    if fwd_file.upload_status != 'success':
                        summary_data['all_forwarded'] = False

            if summary_enabled:
                payload = summary_data

        return payload


    @endpoint(
        description="Get a 'courrier' from an openADS 'dossier'",
        pattern='^(?P<type_dossier>\w+)/?$',
        example_pattern='{type_dossier}/',
        parameters={
             'type_dossier'  : {'description': _("Type of 'dossier'")       , 'example_value': 'DIA'},
             'numero_dossier': {'description': _("Identifier for 'dossier'"), 'example_value': 'DIA0130551900001'}
        })
    def get_courrier(self, request, type_dossier, numero_dossier, *args, **kwargs):
        url = urlparse.urljoin(
            self.openADS_API_url,
            '/dossier/%s/%s/courrier/%s' % (type_dossier, numero_dossier, 'dia_renonciation_preempter'))
        response = self.requests.get(url, auth=(self.openADS_API_login, self.openADS_API_password))
        if response.status_code // 100 != 2:
            error = self.get_response_error(response)
            self.logger.warning("Request [GET] '%s' failed with error: '%s'", url, error)
            raise APIError(error)
        try:
            result = response.json()
        except ValueError:
            raise APIError('No JSON content returned: %r' % response.content[:1000])
        courrier = result['files'][0]
        try:
            courrier_content = base64.b64decode(courrier['b64_content'])
        except TypeError:
            raise APIError('Invalid content for courrier')
        return {'courrier': courrier}


    def get_response_error(self, response):
        """Return a error string from an HTTP response."""
        try:
            result = response.json()
            errors = result.get('errors')
            msg = []
            if errors:
                for error in errors:
                    location = error.get('location')
                    name = error.get('name')
                    desc = error.get('description')
                    msg.append('[%s] (%s) %s' % (location, normalize(name), normalize(desc)))
            if msg:
                return "HTTP error: %s, %s" % (response.status_code, ','.join(msg))
        except ValueError:
            pass
        return "HTTP error: %s, %s" % \
            (response.status_code,
            clean_spaces(strip_tags(response.content[:1000])) if response.content else '')


    def upload_user_files(self, type_dossier, numero_dossier, file_ids):
        """A Job to forward user uploaded files to openADS."""
        payload = []
        fwd_files = []

        for fid in file_ids:
            self.debug("upload_user_files() ForwardFile file_id: %s", fid)
            fwd_file = ForwardFile.objects.get(id=fid)
            if fwd_file:
                self.debug("upload_user_files() got ForwardFile")
                payload.append({
                    'filename'     : fwd_file.orig_filename + ('.pdf' if fwd_file.orig_filename[-4:] != '.pdf' else ''),
                    'content_type' : fwd_file.content_type,
                    'b64_content'  : base64.b64encode(fwd_file.upload_file.read()),
                    'file_type'    : fwd_file.type_fichier
                })
                self.debug("upload_user_files() payload added")
                fwd_file.upload_status = 'uploading'
                if fwd_file.upload_msg and re.search(r'^attempt \d+$', fwd_file.upload_msg):
                    self.debug("upload_user_files() upload_msg: '%s'", fwd_file.upload_msg)
                    attempt_num = fwd_file.upload_msg.replace('attempt ', '').strip()
                    self.debug("upload_user_files() attempt_num: '%s'", attempt_num)
                    fwd_file.upload_msg    = 'attempt %s' % attempt_num
                else:
                    fwd_file.upload_msg    = 'attempt 1'
                self.debug("upload_user_files() ForwardFile ready to be saved")
                fwd_file.save()
                fwd_files.append(fwd_file)
            else:
                self.logger.warning("upload_user_files() failed to find ForwardFile file_id: %s", fid);

        if payload:
            self.debug("upload_user_files() payload is not empty")
            debug_payload = copy.deepcopy(payload)
            for p in debug_payload:
                if 'b64_content' in p:
                    p['b64_content'] = '<b64 content>'
            self.debug("upload_user_files() payload is: %s", str(debug_payload))
            url = urlparse.urljoin(self.openADS_API_url, '/dossier/%s/%s/files' % (type_dossier, numero_dossier))
            response = self.requests.post(
                url,
                json=payload,
                auth=(self.openADS_API_login, self.openADS_API_password),
                timeout=self.openADS_API_timeout
            )
            if response.status_code // 100 != 2:
                for fwd_file in fwd_files:
                    fwd_file.upload_status = 'failed'
                    fwd_file.upload_msg    = self.get_response_error(response)
                    fwd_file.save()
                self.logger.warning(
                    "upload_user_files() openADS response is not OK (code: %s) for dossier '%s' and files '%s'",
                    response.status_code,
                    numero_dossier,
                    ','.join(file_ids)
                )
            else:
                try:
                    result = response.json()
                except ValueError:
                    for fwd_file in fwd_files:
                        fwd_file.upload_status = 'failed'
                        fwd_file.upload_msg    = 'No JSON content returned: %r' % response.content[:1000]
                        fwd_file.save()
                    self.logger.warning(
                        "upload_user_files() openADS response is not JSON valid for dossier '%s' and files '%s'",
                        numero_dossier,
                        ','.join([f.id for f in fwd_files])
                    )
                else:
                    # TODO handle response (now its just an informational sentence in key 'data')
                    for fwd_file in fwd_files:
                        fwd_file.upload_status = 'success'
                        fwd_file.upload_msg    = 'uploaded successfuly'
                        # delete file content (on success)
                        fpath = fwd_file.upload_file.path
                        fwd_file.upload_file.delete()
                        fwd_file.save()
                        self.debug(
                            "upload_user_files() flaging file '%s' has transfered (deleted '%s')",
                            fwd_file.id,
                            fpath
                        )
        else:
            self.logger.warning(
                "upload_user_files() payload is empty for dossier '%s' and files '%s'",
                numero_dossier,
                ','.join(file_ids)
            )


    # copy-pasted from 'wcs/qommon/misc.py'
    def file_digest(self, content, chunk_size=100000):
        """Return a hash for the content specified."""
        digest = hashlib.sha256()
        content.seek(0)
        def read_chunk():
            return content.read(chunk_size)
        for chunk in iter(read_chunk, ''):
            digest.update(chunk)
        return digest.hexdigest()