src/pyams_thesaurus/thesaurus.py
changeset 0 47700a43ef3f
child 4 5dc0c1fa6af6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pyams_thesaurus/thesaurus.py	Tue Apr 14 17:52:05 2015 +0200
@@ -0,0 +1,397 @@
+#
+# Copyright (c) 2008-2015 Thierry Florac <tflorac AT ulthar.net>
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+
+__docformat__ = 'restructuredtext'
+
+
+# import standard library
+import re
+
+# import interfaces
+from pyams_security.interfaces import IDefaultProtectionPolicy
+from pyams_thesaurus.interfaces.extension import IThesaurusTermExtension
+from pyams_thesaurus.interfaces.loader import IThesaurusLoader
+from pyams_thesaurus.interfaces.term import IThesaurusTermsContainer, IThesaurusTerm, IThesaurusLoaderTerm
+from pyams_thesaurus.interfaces.thesaurus import IThesaurus, IThesaurusExtract, IThesaurusExtractRoles, \
+    IThesaurusRoles, IThesaurusExtracts
+from pyams_utils.interfaces.site import IOptionalUtility
+from pyams_utils.interfaces.tree import ITree
+from transaction.interfaces import ITransactionManager
+from zope.annotation.interfaces import IAnnotations
+from zope.interface.interfaces import ComponentLookupError
+from zope.lifecycleevent.interfaces import IObjectAddedEvent, IObjectRemovedEvent
+from zope.location.interfaces import ISublocations
+from zope.schema.interfaces import IVocabularyFactory
+from zope.traversing.interfaces import ITraversable
+
+# import packages
+from hypatia.catalog import Catalog
+from hypatia.query import Eq, Contains
+from hypatia.text.lexicon import Lexicon
+from hypatia.text.parsetree import QueryError
+from persistent import Persistent
+from pyams_catalog.index import TextIndexWithInterface, FieldIndexWithInterface
+from pyams_catalog.nltk import NltkStemmedTextProcessor
+from pyams_catalog.query import CatalogResultSet, or_
+from pyams_catalog.utils import index_object
+from pyams_security.security import ProtectedObject
+from pyams_security.property import RolePrincipalsFieldProperty
+from pyams_utils.adapter import adapter_config, ContextAdapter
+from pyams_utils.registry import query_utility, get_utilities_for
+from pyams_utils.request import check_request
+from pyams_utils.traversing import get_parent
+from pyams_utils.unicode import translate_string
+from pyramid.events import subscriber
+from pyramid.threadlocal import get_current_registry
+from zope.componentvocabulary.vocabulary import UtilityVocabulary
+from zope.container.btree import BTreeContainer
+from zope.container.contained import Contained
+from zope.interface import implementer, provider
+from zope.lifecycleevent import ObjectCreatedEvent, ObjectModifiedEvent, ObjectAddedEvent
+from zope.location import locate
+from zope.schema.fieldproperty import FieldProperty
+from zope.schema.vocabulary import getVocabularyRegistry, SimpleVocabulary, SimpleTerm
+
+
+CUSTOM_SEARCH = re.compile(r'\*|\"|\sand\s|\sor\s|\snot\s|\(|\)', re.IGNORECASE)
+
+
+@implementer(IThesaurusTermsContainer)
+class ThesaurusTermsContainer(BTreeContainer):
+    """Thesaurus terms container"""
+
+    def clear(self):
+        self._SampleContainer__data.clear()
+        self._BTreeContainer__len.set(0)
+
+
+@implementer(IThesaurus, ISublocations, IDefaultProtectionPolicy, IOptionalUtility)
+class Thesaurus(ProtectedObject, Persistent, Contained):
+    """Thesaurus persistent class"""
+
+    __roles__ = ('thesaurus.Admin', 'thesaurus.Manager')
+    roles_interface = IThesaurusRoles
+
+    name = FieldProperty(IThesaurus['name'])
+    title = FieldProperty(IThesaurus['title'])
+    subject = FieldProperty(IThesaurus['subject'])
+    description = FieldProperty(IThesaurus['description'])
+    language = FieldProperty(IThesaurus['language'])
+    creator = FieldProperty(IThesaurus['creator'])
+    publisher = FieldProperty(IThesaurus['publisher'])
+    created = FieldProperty(IThesaurus['created'])
+
+    terms = None
+    _top_terms = FieldProperty(IThesaurus['top_terms'])
+    catalog = FieldProperty(IThesaurus['catalog'])
+
+    administrators = RolePrincipalsFieldProperty(IThesaurus['administrators'])
+    managers = RolePrincipalsFieldProperty(IThesaurus['managers'])
+
+    def __init__(self, name=None, description=None, terms=None):
+        if name:
+            self.name = name
+        if description:
+            self.title = description.title
+            self.subject = description.subject
+            self.description = description.description
+            self.language = description.language
+            self.creator = description.creator
+            self.publisher = description.publisher
+            self.created = description.created
+        if not IThesaurusTermsContainer.providedBy(terms):
+            terms = ThesaurusTermsContainer()
+        self.terms = terms
+        locate(terms, self, '++terms++')
+        self.reset_terms_parent()
+        self.reset_top_terms()
+
+    @property
+    def sublocations(self):
+        return self.terms, self.catalog
+
+    @property
+    def top_terms(self):
+        return self._top_terms
+
+    @top_terms.setter
+    def top_terms(self, value):
+        self._top_terms = [term for term in value or () if term.usage is None]
+
+    def init_catalog(self):
+        catalog = self.catalog = Catalog()
+        # Init fulltext search catalog
+        catalog['fulltext'] = TextIndexWithInterface(IThesaurusTerm, 'label', language=self.language)
+        # Init stemmed search catalog
+        catalog['stemmed'] = TextIndexWithInterface(IThesaurusTerm, 'label',
+                                                    lexicon=Lexicon(NltkStemmedTextProcessor(language=self.language)))
+        # Init value search catalog
+        catalog['value'] = FieldIndexWithInterface(IThesaurusTerm, 'base_label')
+        for idx, term in enumerate(self.terms.values()):
+            index_object(term, catalog)
+            if not idx % 100:
+                try:
+                    ITransactionManager(catalog).savepoint()
+                except TypeError:
+                    # Can fail if thesaurus is not stored yet...
+                    pass
+
+    def load(self, configuration):
+        loader = query_utility(IThesaurusLoader, name=configuration.format)
+        if loader is not None:
+            result = loader.load(configuration.data)
+            self.merge(configuration, result)
+
+    def merge(self, configuration, thesaurus=None):
+        if thesaurus is None:
+            loader = query_utility(IThesaurusLoader, name=configuration.format)
+            if loader is not None:
+                thesaurus = loader.load(configuration.data)
+        if thesaurus is not None:
+            # define or merge items from given thesaurus
+            terms = self.terms
+            for idx, (key, term) in enumerate(thesaurus.terms.items()):
+                # check for term conflict
+                if configuration.conflict_suffix:
+                    suffixed_key = key + ' ' + configuration.conflict_suffix
+                    if suffixed_key in terms:
+                        key = suffixed_key
+                    elif key in terms:
+                        term.label = key
+                if key in terms:
+                    terms[key].merge(term, configuration)
+                elif not IThesaurusLoaderTerm.providedBy(term):
+                    terms[key] = term
+                if not idx % 100:
+                    try:
+                        ITransactionManager(self).savepoint()
+                    except TypeError:
+                        # Can fail if thesaurus is not stored yet...
+                        pass
+        self.reset_terms_parent()
+        self.reset_top_terms()
+
+    def reset_terms_parent(self):
+        for idx, term in enumerate(self.terms.values()):
+            # reset generic/specifics attributes
+            generic = term.generic
+            if (generic is not None) and (term not in generic.specifics):
+                generic.specifics = generic.specifics + [term, ]
+            # reset term's first level parent
+            parent = term
+            while parent.generic is not None:
+                parent = parent.generic
+            term.parent = parent
+            if not idx % 100:
+                try:
+                    ITransactionManager(self).savepoint()
+                except TypeError:
+                    # Can fail if thesaurus is not stored yet...
+                    pass
+
+    def reset_top_terms(self):
+        self.top_terms = [term for term in self.terms.values()
+                          if (not term.generic) and (not term.usage)]
+
+    def clear(self):
+        self.terms.clear()
+        self.catalog.reset()
+        self.top_terms = []
+
+    def find_terms(self, query=None, extract=None, glob='end', limit=None,
+                   exact=False, exact_only=False, stemmed=False):
+        assert exact or (not exact_only)
+        terms = []
+        if exact:
+            query_text = translate_string(query, escape_slashes=True, force_lower=True, spaces=' ')
+            terms = list(CatalogResultSet(Eq(self.catalog['value'], query_text)))
+        if not exact_only:
+            search = None
+            # check stemmed index
+            if stemmed and not re.search(r'\*', query):
+                search = Contains(self.catalog['stemmed'],
+                                  ' and '.join(m for m in query.split() if len(m) > 2))
+            # check basic index
+            start = ''
+            end = ''
+            if CUSTOM_SEARCH.search(query):
+                query_text = query
+            else:
+                if glob in ('start', 'both'):
+                    # Starting glob is not supported!!
+                    start = ''
+                if glob in ('end', 'both'):
+                    end = '*'
+                query_text = ' and '.join(('{start}{mid}{end}'.format(start=start, mid=m, end=end)
+                                           for m in query.split() if len(m) > 2))
+            search = or_(search, Contains(self.catalog['fulltext'], query_text))
+            try:
+                terms += sorted(CatalogResultSet(search.execute()),
+                                key=lambda x: x.label)
+            except QueryError:
+                pass
+        if extract:
+            terms = filter(lambda term: extract in term.extracts, terms)
+        if limit:
+            terms = terms[:limit]
+        return terms
+
+    def delete(self):
+        pass
+
+
+@subscriber(IObjectAddedEvent, context_selector=IThesaurus)
+def handle_added_thesaurus(event):
+    """Handle added thesaurus to init inner catalog"""
+    manager = event.newParent
+    manager.registerUtility(event.object, IThesaurus, name=event.object.name)
+    event.object.init_catalog()
+
+
+@subscriber(IObjectRemovedEvent, context_selector=IThesaurus)
+def handle_removed_thesaurus(event):
+    """Handle removed thesaurus"""
+    manager = event.oldParent
+    manager.unregisterUtility(event.object, IThesaurus, name=event.object.name)
+
+
+@adapter_config(context=IThesaurus, provides=ITree)
+class ThesaurusTreeAdapter(ContextAdapter):
+    """Thesaurus tree adapter"""
+
+    def get_root_nodes(self):
+        return self.context.top_terms
+
+
+#
+# Thesaurus extracts
+#
+
+THESAURUS_EXTRACTS_KEY = 'pyams_thesaurus.extracts'
+
+
+@implementer(IThesaurusExtracts)
+class ThesaurusExtractsContainer(BTreeContainer):
+    """Thesaurus extracts container"""
+
+
+@adapter_config(context=IThesaurus, provides=IThesaurusExtracts)
+def ThesaurusExtractsFactory(context):
+    """Thesaurus extracts factory"""
+    annotations = IAnnotations(context)
+    extracts = annotations.get(THESAURUS_EXTRACTS_KEY)
+    if extracts is None:
+        extracts = annotations[THESAURUS_EXTRACTS_KEY] = ThesaurusExtractsContainer()
+        get_current_registry().notify(ObjectCreatedEvent(extracts))
+        locate(extracts, context, '++extracts++')
+    return extracts
+
+
+@adapter_config(name='extracts', context=IThesaurus, provides=ITraversable)
+class ThesaurusExtractsNamespace(ContextAdapter):
+    """Thesaurus ++extracts++ namespace"""
+
+    def traverse(self, name, furtherpath=None):
+        extracts = IThesaurusExtracts(self.context)
+        if name:
+            return extracts[name]
+        else:
+            return extracts
+
+
+@implementer(IThesaurusExtract)
+class ThesaurusExtract(Persistent, Contained):
+    """Thesaurus extract"""
+
+    __roles__ = ('thesaurus.ExtractManager', )
+    roles_interface = IThesaurusExtractRoles
+
+    name = FieldProperty(IThesaurusExtract['name'])
+    description = FieldProperty(IThesaurusExtract['description'])
+    abbreviation = FieldProperty(IThesaurusExtract['abbreviation'])
+    color = FieldProperty(IThesaurusExtract['color'])
+
+    managers = RolePrincipalsFieldProperty(IThesaurusExtract['managers'])
+
+    def add_term(self, term):
+        term.add_extract(self)
+
+    def remove_term(self, term):
+        term.remove_extract(self)
+
+
+@subscriber(IObjectRemovedEvent, context_selector=IThesaurusExtract)
+def handle_removed_extract(event):
+    """Handle removed extract"""
+    thesaurus = get_parent(event.object, IThesaurus)
+    name = event.object.name
+    for term in thesaurus.terms.values():
+        term.remove_extract(name, check=False)
+
+
+#
+# Vocabularies
+#
+
+@provider(IVocabularyFactory)
+class ThesaurusVocabulary(UtilityVocabulary):
+    """Thesaurus utilities vocabulary"""
+
+    interface = IThesaurus
+    nameOnly = False
+
+getVocabularyRegistry().register('PyAMS thesaurus', ThesaurusVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusNamesVocabulary(UtilityVocabulary):
+    """Thesaurus names utilities vocabulary"""
+
+    interface = IThesaurus
+    nameOnly = True
+
+getVocabularyRegistry().register('PyAMS thesaurus names', ThesaurusNamesVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusExtractsVocabulary(SimpleVocabulary):
+    """Thesaurus extracts vocabulary"""
+
+    def __init__(self, context=None):
+        terms = []
+        if context is not None:
+            thesaurus = get_parent(context, IThesaurus)
+            if thesaurus is not None:
+                extracts = IThesaurusExtracts(thesaurus)
+                terms = [SimpleTerm(extract.__name__, title=extract.name) for extract in extracts.values()]
+                terms.sort(key=lambda x: x.title)
+        super(ThesaurusExtractsVocabulary, self).__init__(terms)
+
+getVocabularyRegistry().register('PyAMS thesaurus extracts', ThesaurusExtractsVocabulary)
+
+
+@provider(IVocabularyFactory)
+class ThesaurusTermExtensionsVocabulary(SimpleVocabulary):
+    """Thesaurus term extensions vocabulary"""
+
+    interface = IThesaurusTermExtension
+
+    def __init__(self, context=None):
+        translate = check_request().localizer.translate
+        try:
+            terms = [SimpleTerm(name, title=translate(util.label))
+                     for name, util in get_utilities_for(self.interface)]
+        except ComponentLookupError:
+            terms = []
+        super(ThesaurusTermExtensionsVocabulary, self).__init__(terms)
+
+getVocabularyRegistry().register('PyAMS thesaurus extensions', ThesaurusTermExtensionsVocabulary)