equal
deleted
inserted
replaced
18 # import interfaces |
18 # import interfaces |
19 from hypatia.text.interfaces import IPipelineElement |
19 from hypatia.text.interfaces import IPipelineElement |
20 |
20 |
21 # import packages |
21 # import packages |
22 import nltk |
22 import nltk |
|
23 from pyams_i18n.language import BASE_LANGUAGES |
23 from pyams_utils.unicode import translate_string |
24 from pyams_utils.unicode import translate_string |
24 from zope.interface import implementer |
25 from zope.interface import implementer |
25 |
26 |
26 |
27 |
27 @implementer(IPipelineElement) |
28 @implementer(IPipelineElement) |
28 class NltkStemmedTextProcessor(object): |
29 class NltkStemmedTextProcessor(object): |
29 """NLTK based text processor using stemmer""" |
30 """NLTK based text processor using stemmer""" |
30 |
31 |
31 def __init__(self, language='english'): |
32 def __init__(self, language='english'): |
|
33 if language in BASE_LANGUAGES: |
|
34 language = BASE_LANGUAGES[language].lower() |
32 self.language = language |
35 self.language = language |
33 self.stemmer = nltk.stem.SnowballStemmer(language, ignore_stopwords=True) |
36 self.stemmer = nltk.stem.SnowballStemmer(language, ignore_stopwords=True) |
34 |
37 |
35 def process(self, lst): |
38 def process(self, lst): |
36 result = [] |
39 result = [] |
56 @implementer(IPipelineElement) |
59 @implementer(IPipelineElement) |
57 class NltkFullTextProcessor(object): |
60 class NltkFullTextProcessor(object): |
58 """NLTK based full text processor""" |
61 """NLTK based full text processor""" |
59 |
62 |
60 def __init__(self, language='english'): |
63 def __init__(self, language='english'): |
|
64 if language in BASE_LANGUAGES: |
|
65 language = BASE_LANGUAGES[language].lower() |
61 self.language = language |
66 self.language = language |
62 |
67 |
63 def process(self, lst): |
68 def process(self, lst): |
64 result = [] |
69 result = [] |
65 for s in lst: |
70 for s in lst: |