84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
from ZODB.POSException import ConflictError
|
|
from five import grok
|
|
from Products.CMFCore.utils import getToolByName
|
|
from plone.indexer import indexer
|
|
|
|
from zope.component import getUtility
|
|
from zope.schema.interfaces import IVocabularyFactory
|
|
|
|
from pfwbged.folder import IFolder
|
|
from collective.dms.basecontent.dmsdocument import IDmsDocument
|
|
|
|
from plone import api
|
|
|
|
|
|
from .behaviors import IPfwbDocument
|
|
|
|
@indexer(IPfwbDocument)
|
|
def document_dynamic_searchable_text_indexer(obj):
|
|
indexed_elements = [obj.title]
|
|
|
|
# if there is no path to text/plain, do nothing
|
|
transforms = getToolByName(obj, 'portal_transforms')
|
|
|
|
had_version = False
|
|
for child in reversed(obj.values()):
|
|
if child.portal_type in ('dmsmainfile', 'dmsappendixfile'):
|
|
if not child.file or child.file.getSize() == 0:
|
|
continue
|
|
|
|
if not transforms._findPath(child.file.contentType, 'text/plain'):
|
|
continue
|
|
|
|
# only index the latest version
|
|
if child.portal_type == 'dmsmainfile' and had_version:
|
|
continue
|
|
had_version = True
|
|
|
|
# convert it to text/plain
|
|
try:
|
|
datastream = transforms.convertTo(
|
|
'text/plain', child.file.data, mimetype=child.file.contentType,
|
|
filename=child.file.filename.encode('utf-8'))
|
|
indexed_elements.append(unicode(datastream.getData(), 'utf-8',
|
|
'ignore'))
|
|
except (ConflictError, KeyboardInterrupt):
|
|
raise
|
|
|
|
if hasattr(obj, 'keywords') and obj.keywords:
|
|
factory = getUtility(IVocabularyFactory, 'dms.thesaurus.simple')
|
|
thesaurus = api.portal.get()['thesaurus']
|
|
vocabulary = factory(obj)
|
|
for keyword in obj.keywords:
|
|
try:
|
|
indexed_elements.append(thesaurus[keyword].get_words_for_indexation())
|
|
except LookupError:
|
|
continue
|
|
|
|
return u' '.join([x for x in indexed_elements if x])
|
|
|
|
grok.global_adapter(document_dynamic_searchable_text_indexer,
|
|
name='SearchableText')
|
|
|
|
|
|
@indexer(IDmsDocument)
|
|
def thesaurus_keywords(obj, **kw):
|
|
if not hasattr(obj, 'keywords'):
|
|
return []
|
|
if not obj.keywords:
|
|
return []
|
|
|
|
keywords = [x for x in obj.keywords if x]
|
|
try:
|
|
thesaurus = api.portal.get()['thesaurus']
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
for keyword in keywords[:]:
|
|
try:
|
|
keywords.extend(thesaurus[keyword].get_keyword_tree())
|
|
except KeyError:
|
|
continue
|
|
return keywords
|
|
grok.global_adapter(thesaurus_keywords, name='thesaurus_keywords')
|