ztfy/utils/catalog/index.py
changeset 42 cb2a0e2d3bbf
child 70 82d8de021806
equal deleted inserted replaced
41:da1558b96f0a 42:cb2a0e2d3bbf
       
     1 ### -*- coding: utf-8 -*- ####################################################
       
     2 ##############################################################################
       
     3 #
       
     4 # Copyright (c) 2008-2010 Thierry Florac <tflorac AT ulthar.net>
       
     5 # All Rights Reserved.
       
     6 #
       
     7 # This software is subject to the provisions of the Zope Public License,
       
     8 # Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
       
     9 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
       
    10 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    11 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
       
    12 # FOR A PARTICULAR PURPOSE.
       
    13 #
       
    14 ##############################################################################
       
    15 
       
    16 __docformat__ = "restructuredtext"
       
    17 
       
    18 # import standard packages
       
    19 import re
       
    20 from persistent import Persistent
       
    21 from BTrees import IFBTree
       
    22 
       
    23 # import Zope3 interfaces
       
    24 from zope.index.interfaces import IInjection, IStatistics, IIndexSearch
       
    25 from zopyx.txng3.core.interfaces import IStorageWithTermFrequency
       
    26 from zopyx.txng3.core.interfaces.ting import ITingIndex
       
    27 
       
    28 # import local interfaces
       
    29 
       
    30 # import Zope3 packages
       
    31 from zope.app import zapi
       
    32 from zope.app.catalog.attribute import AttributeIndex
       
    33 from zope.app.container.contained import Contained
       
    34 from zope.interface import implements
       
    35 from zopyx.txng3.core import config
       
    36 from zopyx.txng3.core.index import Index
       
    37 
       
    38 # import local packages
       
    39 from hurry.query.query import IndexTerm
       
    40 
       
    41 
       
    42 class TextIndexNG(AttributeIndex, Persistent, Contained):
       
    43     """Adaptation of zopyx.txng3.core for use zope.app.catalog index"""
       
    44 
       
    45     implements(IInjection, IStatistics, IIndexSearch, ITingIndex)
       
    46 
       
    47     def __init__(self,
       
    48                  field_name=None,
       
    49                  interface=None,
       
    50                  field_callable=False,
       
    51                  use_stemmer=config.defaults['use_stemmer'],
       
    52                  dedicated_storage=config.defaults['dedicated_storage'],
       
    53                  ranking=config.defaults['ranking'],
       
    54                  use_normalizer=config.defaults['use_normalizer'],
       
    55                  languages=config.DEFAULT_LANGUAGE,
       
    56                  use_stopwords=config.defaults['use_stopwords'],
       
    57                  autoexpand_limit=config.defaults['autoexpand_limit'],
       
    58                  splitter=config.DEFAULT_SPLITTER,
       
    59                  index_unknown_languages=config.defaults['index_unknown_languages'],
       
    60                  query_parser=config.DEFAULT_PARSER,
       
    61                  lexicon=config.DEFAULT_LEXICON,
       
    62                  splitter_additional_chars=config.defaults['splitter_additional_chars'],
       
    63                  storage=config.DEFAULT_STORAGE,
       
    64                  splitter_casefolding=config.defaults['splitter_casefolding']):
       
    65         spaces = re.compile(r'\s+')
       
    66         if ranking:
       
    67             util = zapi.createObject(storage)
       
    68             if not IStorageWithTermFrequency.providedBy(util):
       
    69                 raise ValueError("This storage cannot be used for ranking")
       
    70         _fields = spaces.split(field_name)
       
    71         AttributeIndex.__init__(self, _fields[0], interface, field_callable)
       
    72         if len(_fields) < 2:
       
    73             dedicated_storage = False
       
    74         self._index = Index(fields=_fields,
       
    75                             languages=spaces.split(languages),
       
    76                             use_stemmer=use_stemmer,
       
    77                             dedicated_storage=dedicated_storage,
       
    78                             ranking=ranking,
       
    79                             use_normalizer=use_normalizer,
       
    80                             use_stopwords=use_stopwords,
       
    81                             storage=storage,
       
    82                             autoexpand_limit=autoexpand_limit,
       
    83                             splitter=splitter,
       
    84                             lexicon=lexicon,
       
    85                             index_unknown_languages=index_unknown_languages,
       
    86                             query_parser=query_parser,
       
    87                             splitter_additional_chars=splitter_additional_chars,
       
    88                             splitter_casefolding=splitter_casefolding)
       
    89         self.languages = languages
       
    90         self.use_stemmer = use_stemmer
       
    91         self.dedicated_storage = dedicated_storage
       
    92         self.ranking = ranking
       
    93         self.use_normalizer = use_normalizer
       
    94         self.use_stopwords = use_stopwords
       
    95         self.interface = interface
       
    96         self.storage = storage
       
    97         self.autoexpand_limit = autoexpand_limit
       
    98         self.default_field = _fields[0]
       
    99         self._fields = _fields
       
   100         self.splitter = splitter
       
   101         self.lexicon = lexicon
       
   102         self.index_unknown_languages = index_unknown_languages
       
   103         self.query_parser = query_parser
       
   104         self.splitter_additional_chars = splitter_additional_chars
       
   105         self.splitter_casefolding = splitter_casefolding
       
   106 
       
   107     def clear(self):
       
   108         self._index.clear()
       
   109 
       
   110     def documentCount(self):
       
   111         """See interface IStatistics"""
       
   112         return len(self._index.getStorage(self.default_field))
       
   113 
       
   114     def wordCount(self):
       
   115         """See interface IStatistics"""
       
   116         return len(self._index.getLexicon())
       
   117 
       
   118     def index_doc(self, docid, value):
       
   119         """See interface IInjection"""
       
   120         v = self.interface(value, None)
       
   121         if v is not None:
       
   122             self.unindex_doc(docid)
       
   123             self._index.index_object(v, docid)
       
   124 
       
   125     def unindex_doc(self, docid):
       
   126         """See interface IInjection"""
       
   127         self._index.unindex_object(docid)
       
   128 
       
   129     def apply(self, query):
       
   130         if isinstance(query, dict):
       
   131             kw = query
       
   132             query = kw['query']
       
   133             del kw['query']
       
   134         ting_rr = self._index.search(query, **kw)
       
   135         return ting_rr.getDocids().keys()
       
   136 
       
   137 
       
   138 class Text(IndexTerm):
       
   139     """hurry.query search term"""
       
   140 
       
   141     def __init__(self, index_id, text):
       
   142         super(Text, self).__init__(index_id)
       
   143         self.text = text
       
   144 
       
   145     def getIndex(self):
       
   146         index = super(Text, self).getIndex()
       
   147         assert ITingIndex.providedBy(index)
       
   148         return index
       
   149 
       
   150     def apply(self):
       
   151         index = self.getIndex()
       
   152         return IFBTree.IFSet(index.apply(self.text))