--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pyams_thesaurus/thesaurus.py Tue Apr 14 17:52:05 2015 +0200
@@ -0,0 +1,397 @@
+#
+# Copyright (c) 2008-2015 Thierry Florac <tflorac AT ulthar.net>
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+
+__docformat__ = 'restructuredtext'
+
+
+# import standard library
+import re
+
+# import interfaces
+from pyams_security.interfaces import IDefaultProtectionPolicy
+from pyams_thesaurus.interfaces.extension import IThesaurusTermExtension
+from pyams_thesaurus.interfaces.loader import IThesaurusLoader
+from pyams_thesaurus.interfaces.term import IThesaurusTermsContainer, IThesaurusTerm, IThesaurusLoaderTerm
+from pyams_thesaurus.interfaces.thesaurus import IThesaurus, IThesaurusExtract, IThesaurusExtractRoles, \
+ IThesaurusRoles, IThesaurusExtracts
+from pyams_utils.interfaces.site import IOptionalUtility
+from pyams_utils.interfaces.tree import ITree
+from transaction.interfaces import ITransactionManager
+from zope.annotation.interfaces import IAnnotations
+from zope.interface.interfaces import ComponentLookupError
+from zope.lifecycleevent.interfaces import IObjectAddedEvent, IObjectRemovedEvent
+from zope.location.interfaces import ISublocations
+from zope.schema.interfaces import IVocabularyFactory
+from zope.traversing.interfaces import ITraversable
+
+# import packages
+from hypatia.catalog import Catalog
+from hypatia.query import Eq, Contains
+from hypatia.text.lexicon import Lexicon
+from hypatia.text.parsetree import QueryError
+from persistent import Persistent
+from pyams_catalog.index import TextIndexWithInterface, FieldIndexWithInterface
+from pyams_catalog.nltk import NltkStemmedTextProcessor
+from pyams_catalog.query import CatalogResultSet, or_
+from pyams_catalog.utils import index_object
+from pyams_security.security import ProtectedObject
+from pyams_security.property import RolePrincipalsFieldProperty
+from pyams_utils.adapter import adapter_config, ContextAdapter
+from pyams_utils.registry import query_utility, get_utilities_for
+from pyams_utils.request import check_request
+from pyams_utils.traversing import get_parent
+from pyams_utils.unicode import translate_string
+from pyramid.events import subscriber
+from pyramid.threadlocal import get_current_registry
+from zope.componentvocabulary.vocabulary import UtilityVocabulary
+from zope.container.btree import BTreeContainer
+from zope.container.contained import Contained
+from zope.interface import implementer, provider
+from zope.lifecycleevent import ObjectCreatedEvent, ObjectModifiedEvent, ObjectAddedEvent
+from zope.location import locate
+from zope.schema.fieldproperty import FieldProperty
+from zope.schema.vocabulary import getVocabularyRegistry, SimpleVocabulary, SimpleTerm
+
+
+CUSTOM_SEARCH = re.compile(r'\*|\"|\sand\s|\sor\s|\snot\s|\(|\)', re.IGNORECASE)
+
+
+@implementer(IThesaurusTermsContainer)
+class ThesaurusTermsContainer(BTreeContainer):
+ """Thesaurus terms container"""
+
+ def clear(self):
+ self._SampleContainer__data.clear()
+ self._BTreeContainer__len.set(0)
+
+
+@implementer(IThesaurus, ISublocations, IDefaultProtectionPolicy, IOptionalUtility)
+class Thesaurus(ProtectedObject, Persistent, Contained):
+ """Thesaurus persistent class"""
+
+ __roles__ = ('thesaurus.Admin', 'thesaurus.Manager')
+ roles_interface = IThesaurusRoles
+
+ name = FieldProperty(IThesaurus['name'])
+ title = FieldProperty(IThesaurus['title'])
+ subject = FieldProperty(IThesaurus['subject'])
+ description = FieldProperty(IThesaurus['description'])
+ language = FieldProperty(IThesaurus['language'])
+ creator = FieldProperty(IThesaurus['creator'])
+ publisher = FieldProperty(IThesaurus['publisher'])
+ created = FieldProperty(IThesaurus['created'])
+
+ terms = None
+ _top_terms = FieldProperty(IThesaurus['top_terms'])
+ catalog = FieldProperty(IThesaurus['catalog'])
+
+ administrators = RolePrincipalsFieldProperty(IThesaurus['administrators'])
+ managers = RolePrincipalsFieldProperty(IThesaurus['managers'])
+
+ def __init__(self, name=None, description=None, terms=None):
+ if name:
+ self.name = name
+ if description:
+ self.title = description.title
+ self.subject = description.subject
+ self.description = description.description
+ self.language = description.language
+ self.creator = description.creator
+ self.publisher = description.publisher
+ self.created = description.created
+ if not IThesaurusTermsContainer.providedBy(terms):
+ terms = ThesaurusTermsContainer()
+ self.terms = terms
+ locate(terms, self, '++terms++')
+ self.reset_terms_parent()
+ self.reset_top_terms()
+
+ @property
+ def sublocations(self):
+ return self.terms, self.catalog
+
+ @property
+ def top_terms(self):
+ return self._top_terms
+
+ @top_terms.setter
+ def top_terms(self, value):
+ self._top_terms = [term for term in value or () if term.usage is None]
+
+ def init_catalog(self):
+ catalog = self.catalog = Catalog()
+ # Init fulltext search catalog
+ catalog['fulltext'] = TextIndexWithInterface(IThesaurusTerm, 'label', language=self.language)
+ # Init stemmed search catalog
+ catalog['stemmed'] = TextIndexWithInterface(IThesaurusTerm, 'label',
+ lexicon=Lexicon(NltkStemmedTextProcessor(language=self.language)))
+ # Init value search catalog
+ catalog['value'] = FieldIndexWithInterface(IThesaurusTerm, 'base_label')
+ for idx, term in enumerate(self.terms.values()):
+ index_object(term, catalog)
+ if not idx % 100:
+ try:
+ ITransactionManager(catalog).savepoint()
+ except TypeError:
+ # Can fail if thesaurus is not stored yet...
+ pass
+
+ def load(self, configuration):
+ loader = query_utility(IThesaurusLoader, name=configuration.format)
+ if loader is not None:
+ result = loader.load(configuration.data)
+ self.merge(configuration, result)
+
+ def merge(self, configuration, thesaurus=None):
+ if thesaurus is None:
+ loader = query_utility(IThesaurusLoader, name=configuration.format)
+ if loader is not None:
+ thesaurus = loader.load(configuration.data)
+ if thesaurus is not None:
+ # define or merge items from given thesaurus
+ terms = self.terms
+ for idx, (key, term) in enumerate(thesaurus.terms.items()):
+ # check for term conflict
+ if configuration.conflict_suffix:
+ suffixed_key = key + ' ' + configuration.conflict_suffix
+ if suffixed_key in terms:
+ key = suffixed_key
+ elif key in terms:
+ term.label = key
+ if key in terms:
+ terms[key].merge(term, configuration)
+ elif not IThesaurusLoaderTerm.providedBy(term):
+ terms[key] = term
+ if not idx % 100:
+ try:
+ ITransactionManager(self).savepoint()
+ except TypeError:
+ # Can fail if thesaurus is not stored yet...
+ pass
+ self.reset_terms_parent()
+ self.reset_top_terms()
+
+ def reset_terms_parent(self):
+ for idx, term in enumerate(self.terms.values()):
+ # reset generic/specifics attributes
+ generic = term.generic
+ if (generic is not None) and (term not in generic.specifics):
+ generic.specifics = generic.specifics + [term, ]
+ # reset term's first level parent
+ parent = term
+ while parent.generic is not None:
+ parent = parent.generic
+ term.parent = parent
+ if not idx % 100:
+ try:
+ ITransactionManager(self).savepoint()
+ except TypeError:
+ # Can fail if thesaurus is not stored yet...
+ pass
+
+ def reset_top_terms(self):
+ self.top_terms = [term for term in self.terms.values()
+ if (not term.generic) and (not term.usage)]
+
+ def clear(self):
+ self.terms.clear()
+ self.catalog.reset()
+ self.top_terms = []
+
+ def find_terms(self, query=None, extract=None, glob='end', limit=None,
+ exact=False, exact_only=False, stemmed=False):
+ assert exact or (not exact_only)
+ terms = []
+ if exact:
+ query_text = translate_string(query, escape_slashes=True, force_lower=True, spaces=' ')
+ terms = list(CatalogResultSet(Eq(self.catalog['value'], query_text)))
+ if not exact_only:
+ search = None
+ # check stemmed index
+ if stemmed and not re.search(r'\*', query):
+ search = Contains(self.catalog['stemmed'],
+ ' and '.join(m for m in query.split() if len(m) > 2))
+ # check basic index
+ start = ''
+ end = ''
+ if CUSTOM_SEARCH.search(query):
+ query_text = query
+ else:
+ if glob in ('start', 'both'):
+ # Starting glob is not supported!!
+ start = ''
+ if glob in ('end', 'both'):
+ end = '*'
+ query_text = ' and '.join(('{start}{mid}{end}'.format(start=start, mid=m, end=end)
+ for m in query.split() if len(m) > 2))
+ search = or_(search, Contains(self.catalog['fulltext'], query_text))
+ try:
+ terms += sorted(CatalogResultSet(search.execute()),
+ key=lambda x: x.label)
+ except QueryError:
+ pass
+ if extract:
+ terms = filter(lambda term: extract in term.extracts, terms)
+ if limit:
+ terms = terms[:limit]
+ return terms
+
+ def delete(self):
+ pass
+
+
+@subscriber(IObjectAddedEvent, context_selector=IThesaurus)
+def handle_added_thesaurus(event):
+ """Handle added thesaurus to init inner catalog"""
+ manager = event.newParent
+ manager.registerUtility(event.object, IThesaurus, name=event.object.name)
+ event.object.init_catalog()
+
+
+@subscriber(IObjectRemovedEvent, context_selector=IThesaurus)
+def handle_removed_thesaurus(event):
+ """Handle removed thesaurus"""
+ manager = event.oldParent
+ manager.unregisterUtility(event.object, IThesaurus, name=event.object.name)
+
+
+@adapter_config(context=IThesaurus, provides=ITree)
+class ThesaurusTreeAdapter(ContextAdapter):
+ """Thesaurus tree adapter"""
+
+ def get_root_nodes(self):
+ return self.context.top_terms
+
+
+#
+# Thesaurus extracts
+#
+
+THESAURUS_EXTRACTS_KEY = 'pyams_thesaurus.extracts'
+
+
+@implementer(IThesaurusExtracts)
+class ThesaurusExtractsContainer(BTreeContainer):
+ """Thesaurus extracts container"""
+
+
+@adapter_config(context=IThesaurus, provides=IThesaurusExtracts)
+def ThesaurusExtractsFactory(context):
+ """Thesaurus extracts factory"""
+ annotations = IAnnotations(context)
+ extracts = annotations.get(THESAURUS_EXTRACTS_KEY)
+ if extracts is None:
+ extracts = annotations[THESAURUS_EXTRACTS_KEY] = ThesaurusExtractsContainer()
+ get_current_registry().notify(ObjectCreatedEvent(extracts))
+ locate(extracts, context, '++extracts++')
+ return extracts
+
+
+@adapter_config(name='extracts', context=IThesaurus, provides=ITraversable)
+class ThesaurusExtractsNamespace(ContextAdapter):
+ """Thesaurus ++extracts++ namespace"""
+
+ def traverse(self, name, furtherpath=None):
+ extracts = IThesaurusExtracts(self.context)
+ if name:
+ return extracts[name]
+ else:
+ return extracts
+
+
+@implementer(IThesaurusExtract)
+class ThesaurusExtract(Persistent, Contained):
+ """Thesaurus extract"""
+
+ __roles__ = ('thesaurus.ExtractManager', )
+ roles_interface = IThesaurusExtractRoles
+
+ name = FieldProperty(IThesaurusExtract['name'])
+ description = FieldProperty(IThesaurusExtract['description'])
+ abbreviation = FieldProperty(IThesaurusExtract['abbreviation'])
+ color = FieldProperty(IThesaurusExtract['color'])
+
+ managers = RolePrincipalsFieldProperty(IThesaurusExtract['managers'])
+
+ def add_term(self, term):
+ term.add_extract(self)
+
+ def remove_term(self, term):
+ term.remove_extract(self)
+
+
+@subscriber(IObjectRemovedEvent, context_selector=IThesaurusExtract)
+def handle_removed_extract(event):
+ """Handle removed extract"""
+ thesaurus = get_parent(event.object, IThesaurus)
+ name = event.object.name
+ for term in thesaurus.terms.values():
+ term.remove_extract(name, check=False)
+
+
+#
+# Vocabularies
+#
+
+@provider(IVocabularyFactory)
+class ThesaurusVocabulary(UtilityVocabulary):
+ """Thesaurus utilities vocabulary"""
+
+ interface = IThesaurus
+ nameOnly = False
+
+getVocabularyRegistry().register('PyAMS thesaurus', ThesaurusVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusNamesVocabulary(UtilityVocabulary):
+ """Thesaurus names utilities vocabulary"""
+
+ interface = IThesaurus
+ nameOnly = True
+
+getVocabularyRegistry().register('PyAMS thesaurus names', ThesaurusNamesVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusExtractsVocabulary(SimpleVocabulary):
+ """Thesaurus extracts vocabulary"""
+
+ def __init__(self, context=None):
+ terms = []
+ if context is not None:
+ thesaurus = get_parent(context, IThesaurus)
+ if thesaurus is not None:
+ extracts = IThesaurusExtracts(thesaurus)
+ terms = [SimpleTerm(extract.__name__, title=extract.name) for extract in extracts.values()]
+ terms.sort(key=lambda x: x.title)
+ super(ThesaurusExtractsVocabulary, self).__init__(terms)
+
+getVocabularyRegistry().register('PyAMS thesaurus extracts', ThesaurusExtractsVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusTermExtensionsVocabulary(SimpleVocabulary):
+ """Thesaurus term extensions vocabulary"""
+
+ interface = IThesaurusTermExtension
+
+ def __init__(self, context=None):
+ translate = check_request().localizer.translate
+ try:
+ terms = [SimpleTerm(name, title=translate(util.label))
+ for name, util in get_utilities_for(self.interface)]
+ except ComponentLookupError:
+ terms = []
+ super(ThesaurusTermExtensionsVocabulary, self).__init__(terms)
+
+getVocabularyRegistry().register('PyAMS thesaurus extensions', ThesaurusTermExtensionsVocabulary)