--- a/src/pyams_utils/doctests/README.txt Tue Mar 17 16:05:03 2015 +0100
+++ b/src/pyams_utils/doctests/README.txt Fri Mar 20 17:13:14 2015 +0100
@@ -32,6 +32,19 @@
>>> unicode.translate_string(sample, force_lower=True, spaces='-')
'mon-titre-accentue'
+ >>> sample = 'Texte accentué avec "ponctuation" !'
+ >>> unicode.translate_string(sample, force_lower=True, spaces=' ')
+ 'texte accentue avec ponctuation'
+ >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=False, spaces=' ')
+ 'texte accentue avec "ponctuation" !'
+ >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=False, spaces='-')
+ 'texte-accentue-avec-"ponctuation"-!'
+ >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=True, spaces='-')
+ 'texte-accentue-avec-ponctuation'
+ >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=True, spaces=' ', keep_chars='!')
+ 'texte accentue avec ponctuation !'
+
+
If input string can contain 'slashes' (/) or 'backslashes' (\), they are normally removed ;
by using the 'escape_slashes' parameter, the input string is splitted and only the last element is
returned ; this is handy to handle filenames on Windows platform:
--- a/src/pyams_utils/unicode.py Tue Mar 17 16:05:03 2015 +0100
+++ b/src/pyams_utils/unicode.py Fri Mar 20 17:13:14 2015 +0100
@@ -77,7 +77,8 @@
_fillUnicodeTransTable()
-def translate_string(s, escape_slashes=False, force_lower=True, spaces=' ', keep_chars='_-.'):
+def translate_string(s, escape_slashes=False, force_lower=True,
+ spaces=' ', remove_punctuation=True, keep_chars='_-.'):
"""Remove extended characters from string and replace them with 'basic' ones
@param s: text to be cleaned.
@@ -95,10 +96,12 @@
if isinstance(s, bytes):
s = s.decode("utf-8", "replace")
s = s.translate(_unicodeTransTable)
- s = ''.join([a for a in s.translate(_unicodeTransTable)
- if a.replace(' ', '-') in (string.ascii_letters + string.digits + (keep_chars or ''))])
+ if remove_punctuation:
+ punctuation = ''.join(filter(lambda x: x not in keep_chars, string.punctuation))
+ s = ''.join(filter(lambda x: x not in punctuation, s))
if force_lower:
s = s.lower()
+ s = s.strip()
if spaces != ' ':
s = s.replace(' ', spaces)
return s