Added extra-characters to punctuation when translating string to generate URLs
authorThierry Florac <thierry.florac@onf.fr>
Mon, 18 Feb 2019 17:12:39 +0100
changeset 328 5f8deef8e5d2
parent 327 a0d48f90efca
child 329 1482a4b86075
Added extra-characters to punctuation when translating string to generate URLs
src/pyams_utils/unicode.py
--- a/src/pyams_utils/unicode.py	Thu Feb 14 17:37:09 2019 +0100
+++ b/src/pyams_utils/unicode.py	Mon Feb 18 17:12:39 2019 +0100
@@ -12,15 +12,9 @@
 
 __docformat__ = 'restructuredtext'
 
-
-# import standard library
 import codecs
 import string
 
-# import interfaces
-
-# import packages
-
 
 _unicodeTransTable = {}
 def _fillUnicodeTransTable():
@@ -78,6 +72,10 @@
 _fillUnicodeTransTable()
 
 
+removed_chars = '®©™'
+"""List of custom characters to remove from input strings"""
+
+
 def translate_string(s, escape_slashes=False, force_lower=True,
                      spaces=' ', remove_punctuation=True, keep_chars='_-.'):
     """Remove extended characters and diacritics from string and replace them with 'basic' ones
@@ -110,7 +108,7 @@
         s = s.decode("utf-8", "replace")
     s = s.translate(_unicodeTransTable)
     if remove_punctuation:
-        punctuation = ''.join(filter(lambda x: x not in keep_chars, string.punctuation))
+        punctuation = ''.join(filter(lambda x: x not in keep_chars, string.punctuation + removed_chars))
         s = ''.join(filter(lambda x: x not in punctuation, s))
     if force_lower:
         s = s.lower()