This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
collective.dms.batchimport/src/collective/dms/batchimport/batchimport.py

178 lines
6.7 KiB
Python
Raw Normal View History

2013-01-09 10:39:51 +01:00
import os
import logging
2013-01-09 11:16:11 +01:00
import json
2013-01-09 10:39:51 +01:00
2013-01-08 17:22:52 +01:00
from zope.interface import Interface
from zope import schema
2013-01-09 10:39:51 +01:00
from zope import component
2013-02-04 22:17:15 +01:00
from zope.component import queryUtility
2013-01-08 17:22:52 +01:00
2013-01-09 10:39:51 +01:00
from Products.CMFCore.utils import getToolByName
2013-01-08 17:22:52 +01:00
from Products.Five.browser import BrowserView
from plone.autoform.directives import widget
2013-01-09 10:39:51 +01:00
from plone.namedfile.file import NamedBlobFile
from plone.registry.interfaces import IRegistry
2013-02-04 22:17:15 +01:00
from plone.i18n.normalizer.interfaces import IIDNormalizer
from collective.z3cform.datagridfield import DataGridFieldFactory
from collective.z3cform.datagridfield.registry import DictRow
from collective.dms.mailcontent.dmsmail import internalReferenceIncomingMailDefaultValue, receptionDateDefaultValue
2013-01-08 17:22:52 +01:00
from plone.app.registry.browser import controlpanel
2013-01-08 17:22:52 +01:00
from . import _
from . import utils
2013-01-08 17:22:52 +01:00
2013-01-09 10:39:51 +01:00
log = logging.getLogger('collective.dms.batchimport')
class BatchImportError(Exception):
pass
2013-01-08 17:22:52 +01:00
class ICodeTypeMapSchema(Interface):
code = schema.TextLine(title=_("Code"))
portal_type = schema.TextLine(title=_("Portal Type"))
2013-01-09 10:39:51 +01:00
2013-01-08 17:22:52 +01:00
class ISettings(Interface):
fs_root_directory = schema.TextLine(
title=_("FS Root Directory"))
processed_fs_root_directory = schema.TextLine(
title=_("FS Root Directory for processed files"))
2013-01-08 17:22:52 +01:00
code_to_type_mapping = schema.List(
title=_("Code to Portal Type Mapping"),
value_type=DictRow(title=_("Mapping"),
schema=ICodeTypeMapSchema)
)
widget(code_to_type_mapping=DataGridFieldFactory)
2013-01-09 10:39:51 +01:00
2013-01-08 17:22:52 +01:00
class BatchImporter(BrowserView):
2013-01-09 10:39:51 +01:00
def __call__(self):
settings = component.getUtility(IRegistry).forInterface(ISettings, False)
if not settings.fs_root_directory:
log.warning('settings.fs_root_directory is not defined')
2013-01-09 12:22:40 +01:00
return 'ERROR'
2013-01-09 10:39:51 +01:00
if not os.path.exists(settings.fs_root_directory):
log.warning('settings.fs_root_directory do not exist')
2013-01-09 12:22:40 +01:00
return 'ERROR'
2013-01-09 10:39:51 +01:00
self.fs_root_directory = settings.fs_root_directory
if not self.fs_root_directory.endswith('/'):
self.fs_root_directory = self.fs_root_directory + '/'
self.processed_fs_root_directory = settings.processed_fs_root_directory
if not self.processed_fs_root_directory.endswith('/'):
self.processed_fs_root_directory = self.processed_fs_root_directory + '/'
2013-01-09 10:39:51 +01:00
self.code_to_type_mapping = dict()
for mapping in settings.code_to_type_mapping:
self.code_to_type_mapping[mapping['code']] = mapping['portal_type']
2013-01-09 12:22:40 +01:00
nb_imports = 0
nb_errors = 0
for basename, dirnames, filenames in os.walk(self.fs_root_directory):
2013-02-01 14:28:31 +01:00
# avoid folders beginning with .
if os.path.basename(basename).startswith('.'): continue
2013-01-09 11:16:11 +01:00
metadata_filenames = [x for x in filenames if x.endswith('.metadata')]
2013-02-01 14:28:31 +01:00
other_filenames = [x for x in filenames if not x.endswith('.metadata') and not x.startswith('.')]
2013-01-09 11:16:11 +01:00
# first pass, handle metadata files
2013-01-09 11:16:11 +01:00
for filename in metadata_filenames:
metadata_filepath = os.path.join(basename, filename)
foldername = basename[len(self.fs_root_directory):]
2013-01-09 11:16:11 +01:00
metadata = json.load(file(metadata_filepath))
2013-01-09 11:16:11 +01:00
imported_filename = os.path.splitext(filename)[0]
filepath = os.path.join(basename, imported_filename)
try:
self.import_one(filepath, foldername, metadata)
except BatchImportError as e:
2013-01-09 12:29:23 +01:00
log.warning('error importing %s (%s)' % (
os.path.join(foldername, filename), str(e)))
2013-01-09 12:22:40 +01:00
nb_errors += 1
else:
self.mark_as_processed(metadata_filepath)
self.mark_as_processed(filepath)
2013-01-09 12:22:40 +01:00
nb_imports += 1
2013-01-09 11:16:11 +01:00
other_filenames.remove(imported_filename)
2013-01-09 10:39:51 +01:00
# second pass, handle other files, creating individual documents
2013-01-09 11:16:11 +01:00
for filename in other_filenames:
2013-01-09 10:39:51 +01:00
filepath = os.path.join(basename, filename)
foldername = basename[len(self.fs_root_directory):]
try:
self.import_one(filepath, foldername)
except BatchImportError as e:
2013-01-09 12:29:23 +01:00
log.warning('error importing %s (%s)' % (
os.path.join(foldername, filename), str(e)))
2013-01-09 12:22:40 +01:00
nb_errors += 1
else:
self.mark_as_processed(filepath)
2013-01-09 12:22:40 +01:00
nb_imports += 1
2013-01-09 10:39:51 +01:00
2013-01-09 12:22:40 +01:00
return 'OK (%s imported files, %s unprocessed files)' % (nb_imports, nb_errors)
2013-01-09 10:39:51 +01:00
def mark_as_processed(self, filepath):
2013-02-01 14:42:50 +01:00
# if the processed folder is the same as the input folder, we dont move files
if self.processed_fs_root_directory == self.fs_root_directory: return
processed_filepath = os.path.join(self.processed_fs_root_directory,
filepath[len(self.fs_root_directory):])
if not os.path.exists(os.path.dirname(processed_filepath)):
os.makedirs(os.path.dirname(processed_filepath))
os.rename(filepath, processed_filepath)
2013-01-09 10:39:51 +01:00
def get_folder(self, foldername):
folder = getToolByName(self.context, 'portal_url').getPortalObject()
for part in foldername.split('/'):
if not part:
continue
folder = getattr(folder, part)
return folder
2013-02-04 22:17:15 +01:00
def convertTitleToId(self, title):
"""Plug into plone's id-from-title machinery.
"""
#title = title.decode('utf-8')
newid = queryUtility(IIDNormalizer).normalize(title)
return newid
2013-01-09 11:16:11 +01:00
def import_one(self, filepath, foldername, metadata=None):
2013-01-09 10:39:51 +01:00
filename = os.path.basename(filepath)
try:
folder = self.get_folder(foldername)
except AttributeError:
2013-01-09 12:30:19 +01:00
raise BatchImportError('directory structure mismatch')
2013-01-09 10:39:51 +01:00
code = filename.split('-', 1)[0]
portal_type = self.code_to_type_mapping.get(code)
if not portal_type:
2013-02-01 14:28:31 +01:00
raise BatchImportError("no portal type associated to this code '%s'"%code)
2013-01-09 10:39:51 +01:00
2013-02-04 22:17:15 +01:00
document_id = self.convertTitleToId(os.path.splitext(filename)[0])
2013-01-09 10:39:51 +01:00
if hasattr(folder, document_id):
raise BatchImportError('document already exists')
2013-01-09 10:39:51 +01:00
document_file = NamedBlobFile(file(filepath).read(), filename=unicode(filename))
utils.createDocument(self, folder, portal_type, document_id,
filename, document_file, metadata=metadata)
2013-01-08 17:22:52 +01:00
class ControlPanelEditForm(controlpanel.RegistryEditForm):
schema = ISettings
label = _(u'Batch Import Settings')
description = u''
class ControlPanel(controlpanel.ControlPanelFormWrapper):
form = ControlPanelEditForm