--- a/src/pyams_utils/unicode.py Thu Feb 14 17:37:09 2019 +0100
+++ b/src/pyams_utils/unicode.py Mon Feb 18 17:12:39 2019 +0100
@@ -12,15 +12,9 @@
__docformat__ = 'restructuredtext'
-
-# import standard library
import codecs
import string
-# import interfaces
-
-# import packages
-
_unicodeTransTable = {}
def _fillUnicodeTransTable():
@@ -78,6 +72,10 @@
_fillUnicodeTransTable()
+removed_chars = '®©™'
+"""List of custom characters to remove from input strings"""
+
+
def translate_string(s, escape_slashes=False, force_lower=True,
spaces=' ', remove_punctuation=True, keep_chars='_-.'):
"""Remove extended characters and diacritics from string and replace them with 'basic' ones
@@ -110,7 +108,7 @@
s = s.decode("utf-8", "replace")
s = s.translate(_unicodeTransTable)
if remove_punctuation:
- punctuation = ''.join(filter(lambda x: x not in keep_chars, string.punctuation))
+ punctuation = ''.join(filter(lambda x: x not in keep_chars, string.punctuation + removed_chars))
s = ''.join(filter(lambda x: x not in punctuation, s))
if force_lower:
s = s.lower()