src/pyams_utils/doctests/unicode.txt
changeset 139 6daed68877b3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/pyams_utils/doctests/unicode.txt	Sun Feb 18 17:34:42 2018 +0100
@@ -0,0 +1,80 @@
+
+Unicode functions
+-----------------
+
+While working with extended characters sets containing accentuated characters, it's necessary to
+convert strings to UTF8 so that they can be used without any conversion problem.
+
+    >>> from pyams_utils import unicode
+
+'translate_string' is a utility function which can be used, for example, to generate an object's id
+without space and with accentuated characters converted to their unaccentuated version:
+
+    >>> sample = 'Mon titre accentué'
+    >>> unicode.translate_string(sample)
+    'mon titre accentue'
+
+Results are lower-cased by default ; this can be avoided by setting the 'force_lower' argument
+to False:
+
+    >>> unicode.translate_string(sample, force_lower=False)
+    'Mon titre accentue'
+    >>> unicode.translate_string(sample, force_lower=True, spaces='-')
+    'mon-titre-accentue'
+
+    >>> sample = 'Texte accentué avec "ponctuation" !'
+    >>> unicode.translate_string(sample, force_lower=True, spaces=' ')
+    'texte accentue avec ponctuation'
+    >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=False, spaces=' ')
+    'texte accentue avec "ponctuation" !'
+    >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=False, spaces='-')
+    'texte-accentue-avec-"ponctuation"-!'
+    >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=True, spaces='-')
+    'texte-accentue-avec-ponctuation'
+    >>> unicode.translate_string(sample, force_lower=True, remove_punctuation=True, spaces=' ', keep_chars='!')
+    'texte accentue avec ponctuation !'
+
+
+If input string can contain 'slashes' (/) or 'backslashes' (\), they are normally removed ;
+by using the 'escape_slashes' parameter, the input string is splitted and only the last element is
+returned ; this is handy to handle filenames on Windows platform:
+
+    >>> sample = 'Autre / chaîne / accentuée'
+    >>> unicode.translate_string(sample)
+    'autre chaine accentuee'
+    >>> unicode.translate_string(sample, escape_slashes=True)
+    'accentuee'
+    >>> sample = 'C:\\Program Files\\My Application\\test.txt'
+    >>> unicode.translate_string(sample)
+    'cprogram filesmy applicationtest.txt'
+    >>> unicode.translate_string(sample, escape_slashes=True)
+    'test.txt'
+
+To remove remaining spaces or convert them to another character, you can use the "spaces" parameter
+which can contain any string to be used instead of initial spaces:
+
+    >>> sample = 'C:\\Program Files\\My Application\\test.txt'
+    >>> unicode.translate_string(sample, spaces=' ')
+    'cprogram filesmy applicationtest.txt'
+    >>> unicode.translate_string(sample, spaces='-')
+    'cprogram-filesmy-applicationtest.txt'
+
+Spaces replacement is made in the last step, so using it with "escape_slashes" parameter only affects
+the final result:
+
+    >>> unicode.translate_string(sample, escape_slashes=True, spaces='-')
+    'test.txt'
+
+Unicode module also provides encoding and decoding functions:
+
+    >>> var = b'Cha\xeene accentu\xe9e'
+    >>> unicode.decode(var, 'latin1')
+    'Chaîne accentuée'
+    >>> unicode.encode(unicode.decode(var, 'latin1'), 'latin1') == var
+    True
+
+    >>> utf = 'Chaîne accentuée'
+    >>> unicode.encode(utf, 'latin1')
+    b'Cha\xeene accentu\xe9e'
+    >>> unicode.decode(unicode.encode(utf, 'latin1'), 'latin1') == utf
+    True