This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
pfwbged.basecontent/src/pfwbged/basecontent/indexers.py

84 lines
2.6 KiB
Python

from ZODB.POSException import ConflictError
from five import grok
from Products.CMFCore.utils import getToolByName
from plone.indexer import indexer
from zope.component import getUtility
from zope.schema.interfaces import IVocabularyFactory
from pfwbged.folder import IFolder
from collective.dms.basecontent.dmsdocument import IDmsDocument
from plone import api
from .behaviors import IPfwbDocument
@indexer(IPfwbDocument)
def document_dynamic_searchable_text_indexer(obj):
indexed_elements = [obj.title]
# if there is no path to text/plain, do nothing
transforms = getToolByName(obj, 'portal_transforms')
had_version = False
for child in reversed(obj.values()):
if child.portal_type in ('dmsmainfile', 'dmsappendixfile'):
if not child.file or child.file.getSize() == 0:
continue
if not transforms._findPath(child.file.contentType, 'text/plain'):
continue
# only index the latest version
if child.portal_type == 'dmsmainfile' and had_version:
continue
had_version = True
# convert it to text/plain
try:
datastream = transforms.convertTo(
'text/plain', child.file.data, mimetype=child.file.contentType,
filename=child.file.filename.encode('utf-8'))
indexed_elements.append(unicode(datastream.getData(), 'utf-8',
'ignore'))
except (ConflictError, KeyboardInterrupt):
raise
if hasattr(obj, 'keywords') and obj.keywords:
factory = getUtility(IVocabularyFactory, 'dms.thesaurus.simple')
thesaurus = api.portal.get()['thesaurus']
vocabulary = factory(obj)
for keyword in obj.keywords:
try:
indexed_elements.append(thesaurus[keyword].get_words_for_indexation())
except LookupError:
continue
return u' '.join([x for x in indexed_elements if x])
grok.global_adapter(document_dynamic_searchable_text_indexer,
name='SearchableText')
@indexer(IDmsDocument)
def thesaurus_keywords(obj, **kw):
if not hasattr(obj, 'keywords'):
return []
if not obj.keywords:
return []
keywords = [x for x in obj.keywords if x]
try:
thesaurus = api.portal.get()['thesaurus']
except KeyError:
pass
else:
for keyword in keywords[:]:
try:
keywords.extend(thesaurus[keyword].get_keyword_tree())
except KeyError:
continue
return keywords
grok.global_adapter(thesaurus_keywords, name='thesaurus_keywords')