178 lines
6.7 KiB
Python
178 lines
6.7 KiB
Python
import os
|
|
import logging
|
|
import json
|
|
|
|
from zope.interface import Interface
|
|
from zope import schema
|
|
from zope import component
|
|
from zope.component import queryUtility
|
|
|
|
from Products.CMFCore.utils import getToolByName
|
|
from Products.Five.browser import BrowserView
|
|
|
|
from plone.autoform.directives import widget
|
|
from plone.namedfile.file import NamedBlobFile
|
|
from plone.registry.interfaces import IRegistry
|
|
from plone.i18n.normalizer.interfaces import IIDNormalizer
|
|
from collective.z3cform.datagridfield import DataGridFieldFactory
|
|
from collective.z3cform.datagridfield.registry import DictRow
|
|
from collective.dms.mailcontent.dmsmail import internalReferenceIncomingMailDefaultValue, receptionDateDefaultValue
|
|
|
|
from plone.app.registry.browser import controlpanel
|
|
|
|
from . import _
|
|
from . import utils
|
|
|
|
log = logging.getLogger('collective.dms.batchimport')
|
|
|
|
|
|
class BatchImportError(Exception):
|
|
pass
|
|
|
|
|
|
class ICodeTypeMapSchema(Interface):
|
|
code = schema.TextLine(title=_("Code"))
|
|
portal_type = schema.TextLine(title=_("Portal Type"))
|
|
|
|
|
|
class ISettings(Interface):
|
|
fs_root_directory = schema.TextLine(
|
|
title=_("FS Root Directory"))
|
|
|
|
processed_fs_root_directory = schema.TextLine(
|
|
title=_("FS Root Directory for processed files"))
|
|
|
|
code_to_type_mapping = schema.List(
|
|
title=_("Code to Portal Type Mapping"),
|
|
value_type=DictRow(title=_("Mapping"),
|
|
schema=ICodeTypeMapSchema)
|
|
)
|
|
widget(code_to_type_mapping=DataGridFieldFactory)
|
|
|
|
|
|
class BatchImporter(BrowserView):
|
|
def __call__(self):
|
|
settings = component.getUtility(IRegistry).forInterface(ISettings, False)
|
|
|
|
if not settings.fs_root_directory:
|
|
log.warning('settings.fs_root_directory is not defined')
|
|
return 'ERROR'
|
|
|
|
if not os.path.exists(settings.fs_root_directory):
|
|
log.warning('settings.fs_root_directory do not exist')
|
|
return 'ERROR'
|
|
|
|
self.fs_root_directory = settings.fs_root_directory
|
|
if not self.fs_root_directory.endswith('/'):
|
|
self.fs_root_directory = self.fs_root_directory + '/'
|
|
|
|
self.processed_fs_root_directory = settings.processed_fs_root_directory
|
|
if not self.processed_fs_root_directory.endswith('/'):
|
|
self.processed_fs_root_directory = self.processed_fs_root_directory + '/'
|
|
|
|
self.code_to_type_mapping = dict()
|
|
for mapping in settings.code_to_type_mapping:
|
|
self.code_to_type_mapping[mapping['code']] = mapping['portal_type']
|
|
|
|
nb_imports = 0
|
|
nb_errors = 0
|
|
|
|
for basename, dirnames, filenames in os.walk(self.fs_root_directory):
|
|
# avoid folders beginning with .
|
|
if os.path.basename(basename).startswith('.'): continue
|
|
metadata_filenames = [x for x in filenames if x.endswith('.metadata')]
|
|
other_filenames = [x for x in filenames if not x.endswith('.metadata') and not x.startswith('.')]
|
|
|
|
# first pass, handle metadata files
|
|
for filename in metadata_filenames:
|
|
metadata_filepath = os.path.join(basename, filename)
|
|
foldername = basename[len(self.fs_root_directory):]
|
|
|
|
metadata = json.load(file(metadata_filepath))
|
|
|
|
imported_filename = os.path.splitext(filename)[0]
|
|
filepath = os.path.join(basename, imported_filename)
|
|
|
|
try:
|
|
self.import_one(filepath, foldername, metadata)
|
|
except BatchImportError as e:
|
|
log.warning('error importing %s (%s)' % (
|
|
os.path.join(foldername, filename), str(e)))
|
|
nb_errors += 1
|
|
else:
|
|
self.mark_as_processed(metadata_filepath)
|
|
self.mark_as_processed(filepath)
|
|
nb_imports += 1
|
|
|
|
other_filenames.remove(imported_filename)
|
|
|
|
# second pass, handle other files, creating individual documents
|
|
for filename in other_filenames:
|
|
filepath = os.path.join(basename, filename)
|
|
foldername = basename[len(self.fs_root_directory):]
|
|
try:
|
|
self.import_one(filepath, foldername)
|
|
except BatchImportError as e:
|
|
log.warning('error importing %s (%s)' % (
|
|
os.path.join(foldername, filename), str(e)))
|
|
nb_errors += 1
|
|
else:
|
|
self.mark_as_processed(filepath)
|
|
nb_imports += 1
|
|
|
|
return 'OK (%s imported files, %s unprocessed files)' % (nb_imports, nb_errors)
|
|
|
|
def mark_as_processed(self, filepath):
|
|
# if the processed folder is the same as the input folder, we dont move files
|
|
if self.processed_fs_root_directory == self.fs_root_directory: return
|
|
processed_filepath = os.path.join(self.processed_fs_root_directory,
|
|
filepath[len(self.fs_root_directory):])
|
|
if not os.path.exists(os.path.dirname(processed_filepath)):
|
|
os.makedirs(os.path.dirname(processed_filepath))
|
|
os.rename(filepath, processed_filepath)
|
|
|
|
def get_folder(self, foldername):
|
|
folder = getToolByName(self.context, 'portal_url').getPortalObject()
|
|
for part in foldername.split('/'):
|
|
if not part:
|
|
continue
|
|
folder = getattr(folder, part)
|
|
return folder
|
|
|
|
def convertTitleToId(self, title):
|
|
"""Plug into plone's id-from-title machinery.
|
|
"""
|
|
#title = title.decode('utf-8')
|
|
newid = queryUtility(IIDNormalizer).normalize(title)
|
|
return newid
|
|
|
|
def import_one(self, filepath, foldername, metadata=None):
|
|
filename = os.path.basename(filepath)
|
|
try:
|
|
folder = self.get_folder(foldername)
|
|
except AttributeError:
|
|
raise BatchImportError('directory structure mismatch')
|
|
code = filename.split('-', 1)[0]
|
|
portal_type = self.code_to_type_mapping.get(code)
|
|
if not portal_type:
|
|
raise BatchImportError("no portal type associated to this code '%s'"%code)
|
|
|
|
document_id = self.convertTitleToId(os.path.splitext(filename)[0])
|
|
|
|
if hasattr(folder, document_id):
|
|
raise BatchImportError('document already exists')
|
|
|
|
document_file = NamedBlobFile(file(filepath).read(), filename=unicode(filename))
|
|
utils.createDocument(self, folder, portal_type, document_id,
|
|
filename, document_file, metadata=metadata)
|
|
|
|
|
|
class ControlPanelEditForm(controlpanel.RegistryEditForm):
|
|
schema = ISettings
|
|
label = _(u'Batch Import Settings')
|
|
description = u''
|
|
|
|
|
|
class ControlPanel(controlpanel.ControlPanelFormWrapper):
|
|
form = ControlPanelEditForm
|