|
1 ### -*- coding: utf-8 -*- #################################################### |
|
2 ############################################################################## |
|
3 # |
|
4 # Copyright (c) 2008-2010 Thierry Florac <tflorac AT ulthar.net> |
|
5 # All Rights Reserved. |
|
6 # |
|
7 # This software is subject to the provisions of the Zope Public License, |
|
8 # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. |
|
9 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED |
|
10 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
11 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS |
|
12 # FOR A PARTICULAR PURPOSE. |
|
13 # |
|
14 ############################################################################## |
|
15 |
|
16 __docformat__ = "restructuredtext" |
|
17 |
|
18 # import standard packages |
|
19 import re |
|
20 from persistent import Persistent |
|
21 from BTrees import IFBTree |
|
22 |
|
23 # import Zope3 interfaces |
|
24 from zope.index.interfaces import IInjection, IStatistics, IIndexSearch |
|
25 from zopyx.txng3.core.interfaces import IStorageWithTermFrequency |
|
26 from zopyx.txng3.core.interfaces.ting import ITingIndex |
|
27 |
|
28 # import local interfaces |
|
29 |
|
30 # import Zope3 packages |
|
31 from zope.app import zapi |
|
32 from zope.app.catalog.attribute import AttributeIndex |
|
33 from zope.app.container.contained import Contained |
|
34 from zope.interface import implements |
|
35 from zopyx.txng3.core import config |
|
36 from zopyx.txng3.core.index import Index |
|
37 |
|
38 # import local packages |
|
39 from hurry.query.query import IndexTerm |
|
40 |
|
41 |
|
42 class TextIndexNG(AttributeIndex, Persistent, Contained): |
|
43 """Adaptation of zopyx.txng3.core for use zope.app.catalog index""" |
|
44 |
|
45 implements(IInjection, IStatistics, IIndexSearch, ITingIndex) |
|
46 |
|
47 def __init__(self, |
|
48 field_name=None, |
|
49 interface=None, |
|
50 field_callable=False, |
|
51 use_stemmer=config.defaults['use_stemmer'], |
|
52 dedicated_storage=config.defaults['dedicated_storage'], |
|
53 ranking=config.defaults['ranking'], |
|
54 use_normalizer=config.defaults['use_normalizer'], |
|
55 languages=config.DEFAULT_LANGUAGE, |
|
56 use_stopwords=config.defaults['use_stopwords'], |
|
57 autoexpand_limit=config.defaults['autoexpand_limit'], |
|
58 splitter=config.DEFAULT_SPLITTER, |
|
59 index_unknown_languages=config.defaults['index_unknown_languages'], |
|
60 query_parser=config.DEFAULT_PARSER, |
|
61 lexicon=config.DEFAULT_LEXICON, |
|
62 splitter_additional_chars=config.defaults['splitter_additional_chars'], |
|
63 storage=config.DEFAULT_STORAGE, |
|
64 splitter_casefolding=config.defaults['splitter_casefolding']): |
|
65 spaces = re.compile(r'\s+') |
|
66 if ranking: |
|
67 util = zapi.createObject(storage) |
|
68 if not IStorageWithTermFrequency.providedBy(util): |
|
69 raise ValueError("This storage cannot be used for ranking") |
|
70 _fields = spaces.split(field_name) |
|
71 AttributeIndex.__init__(self, _fields[0], interface, field_callable) |
|
72 if len(_fields) < 2: |
|
73 dedicated_storage = False |
|
74 self._index = Index(fields=_fields, |
|
75 languages=spaces.split(languages), |
|
76 use_stemmer=use_stemmer, |
|
77 dedicated_storage=dedicated_storage, |
|
78 ranking=ranking, |
|
79 use_normalizer=use_normalizer, |
|
80 use_stopwords=use_stopwords, |
|
81 storage=storage, |
|
82 autoexpand_limit=autoexpand_limit, |
|
83 splitter=splitter, |
|
84 lexicon=lexicon, |
|
85 index_unknown_languages=index_unknown_languages, |
|
86 query_parser=query_parser, |
|
87 splitter_additional_chars=splitter_additional_chars, |
|
88 splitter_casefolding=splitter_casefolding) |
|
89 self.languages = languages |
|
90 self.use_stemmer = use_stemmer |
|
91 self.dedicated_storage = dedicated_storage |
|
92 self.ranking = ranking |
|
93 self.use_normalizer = use_normalizer |
|
94 self.use_stopwords = use_stopwords |
|
95 self.interface = interface |
|
96 self.storage = storage |
|
97 self.autoexpand_limit = autoexpand_limit |
|
98 self.default_field = _fields[0] |
|
99 self._fields = _fields |
|
100 self.splitter = splitter |
|
101 self.lexicon = lexicon |
|
102 self.index_unknown_languages = index_unknown_languages |
|
103 self.query_parser = query_parser |
|
104 self.splitter_additional_chars = splitter_additional_chars |
|
105 self.splitter_casefolding = splitter_casefolding |
|
106 |
|
107 def clear(self): |
|
108 self._index.clear() |
|
109 |
|
110 def documentCount(self): |
|
111 """See interface IStatistics""" |
|
112 return len(self._index.getStorage(self.default_field)) |
|
113 |
|
114 def wordCount(self): |
|
115 """See interface IStatistics""" |
|
116 return len(self._index.getLexicon()) |
|
117 |
|
118 def index_doc(self, docid, value): |
|
119 """See interface IInjection""" |
|
120 v = self.interface(value, None) |
|
121 if v is not None: |
|
122 self.unindex_doc(docid) |
|
123 self._index.index_object(v, docid) |
|
124 |
|
125 def unindex_doc(self, docid): |
|
126 """See interface IInjection""" |
|
127 self._index.unindex_object(docid) |
|
128 |
|
129 def apply(self, query): |
|
130 if isinstance(query, dict): |
|
131 kw = query |
|
132 query = kw['query'] |
|
133 del kw['query'] |
|
134 ting_rr = self._index.search(query, **kw) |
|
135 return ting_rr.getDocids().keys() |
|
136 |
|
137 |
|
138 class Text(IndexTerm): |
|
139 """hurry.query search term""" |
|
140 |
|
141 def __init__(self, index_id, text): |
|
142 super(Text, self).__init__(index_id) |
|
143 self.text = text |
|
144 |
|
145 def getIndex(self): |
|
146 index = super(Text, self).getIndex() |
|
147 assert ITingIndex.providedBy(index) |
|
148 return index |
|
149 |
|
150 def apply(self): |
|
151 index = self.getIndex() |
|
152 return IFBTree.IFSet(index.apply(self.text)) |