2012-09-20 10:36:48 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2012-06-14 01:22:04 +02:00
|
|
|
# https://github.com/mfenniak/pyPdf
|
2012-07-02 23:57:37 +02:00
|
|
|
from collections import defaultdict
|
2014-01-20 17:14:50 +01:00
|
|
|
import logging
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
2012-07-02 23:57:37 +02:00
|
|
|
|
2012-06-14 01:22:04 +02:00
|
|
|
from pyPdf import PdfFileReader
|
2012-06-29 18:13:46 +02:00
|
|
|
from pyPdf.utils import PyPdfError
|
2012-06-14 01:22:04 +02:00
|
|
|
|
2014-01-23 15:17:02 +01:00
|
|
|
import magic
|
|
|
|
|
2012-07-02 23:57:37 +02:00
|
|
|
from ..base.models import Request
|
|
|
|
from ..utils import cache_to_django
|
2012-06-14 01:22:04 +02:00
|
|
|
|
2012-09-20 10:36:48 +02:00
|
|
|
N_ = lambda x: x
|
|
|
|
|
2012-10-02 22:49:24 +02:00
|
|
|
def check_pdf(filelike):
|
|
|
|
try:
|
2014-01-20 17:14:50 +01:00
|
|
|
try:
|
|
|
|
pdf_file = PdfFileReader(filelike)
|
|
|
|
except:
|
|
|
|
logger.exception('Invalid pdf file')
|
|
|
|
raise ValueError(N_(u'Ce fichier PDF est invalide'))
|
2013-12-16 17:20:52 +01:00
|
|
|
if pdf_file.isEncrypted and pdf_file.decrypt('') != 1:
|
2012-10-02 22:49:24 +02:00
|
|
|
raise ValueError(N_(u'Les fichiers PDF chiffrés ne sont pas acceptés.'))
|
2012-11-13 15:00:57 +01:00
|
|
|
pdf_file.documentInfo
|
2012-10-02 22:49:24 +02:00
|
|
|
except PyPdfError:
|
|
|
|
raise ValueError(N_(u"Vous ne pouvez envoyer que des fichiers PDF."))
|
|
|
|
|
2012-06-14 01:22:04 +02:00
|
|
|
def fill_document_attributes_from_pdf_file(document, docfile):
|
2014-01-23 15:17:02 +01:00
|
|
|
if not docfile:
|
|
|
|
return
|
2012-06-29 18:13:46 +02:00
|
|
|
try:
|
2014-01-23 15:17:02 +01:00
|
|
|
docfile.open()
|
|
|
|
mime_type = magic.from_buffer(docfile.read(), mime=True)
|
|
|
|
docfile.seek(0)
|
|
|
|
if mime_type == 'application/pdf':
|
2014-01-20 17:14:50 +01:00
|
|
|
try:
|
|
|
|
pdf_file = PdfFileReader(docfile)
|
|
|
|
except:
|
|
|
|
logger.exception('Invalid pdf file')
|
|
|
|
raise ValueError(N_(u'Ce fichier PDF est invalide'))
|
2014-01-14 18:10:13 +01:00
|
|
|
if pdf_file.isEncrypted and pdf_file.decrypt('') != 1:
|
2012-09-20 10:36:48 +02:00
|
|
|
raise ValueError(N_(u'Les fichiers PDF chiffrés ne sont pas acceptés.'))
|
2012-06-29 18:13:46 +02:00
|
|
|
pdf_info = pdf_file.documentInfo
|
2012-11-28 11:15:23 +01:00
|
|
|
if pdf_info is None:
|
|
|
|
return
|
2008-02-03 00:04:25 +01:00
|
|
|
titles = filter(None, [pdf_info.title, pdf_info.subject])
|
2013-09-05 18:52:48 +02:00
|
|
|
document.name = u' - '.join(titles)[:60]
|
2012-10-01 13:51:47 +02:00
|
|
|
document.nb_pages = pdf_file.numPages or 0
|
2014-01-23 15:17:02 +01:00
|
|
|
if mime_type == 'image/jpeg':
|
|
|
|
return
|
|
|
|
raise ValueError(N_(u"Ce type de fichier n'est pas accepté"))
|
2012-06-29 18:13:46 +02:00
|
|
|
except PyPdfError:
|
2012-09-20 10:36:48 +02:00
|
|
|
raise ValueError(N_(u"Vous ne pouvez envoyer que des fichiers PDF."))
|
2012-07-02 23:57:37 +02:00
|
|
|
|
|
|
|
@cache_to_django(timeout=300)
|
|
|
|
def field_completion(user, field_names, without_scores=True):
|
|
|
|
qs = Request.objects.filter(user=user)
|
|
|
|
scores = defaultdict(lambda: defaultdict(lambda: 0))
|
|
|
|
for values in qs.values_list(*field_names):
|
|
|
|
for field_name, value in zip(field_names, values):
|
|
|
|
if value is not None:
|
|
|
|
scores[field_name][value] += 1
|
|
|
|
for key, value in scores.iteritems():
|
|
|
|
scores[key] = sorted(scores[key].items(), key=lambda x: x[1])
|
|
|
|
if without_scores:
|
|
|
|
scores[key] = map(lambda x: x[0], scores[key])
|
|
|
|
scores[key] = scores[key][:3] + sorted(scores[key][3:])
|
|
|
|
return scores
|
|
|
|
|