src/pyams_utils/unicode.py
branchdev-tf
changeset 408 cf2304af0fab
parent 391 49d63e4bf171
--- a/src/pyams_utils/unicode.py	Wed Nov 20 19:26:23 2019 +0100
+++ b/src/pyams_utils/unicode.py	Fri Nov 22 18:51:37 2019 +0100
@@ -10,16 +10,22 @@
 # FOR A PARTICULAR PURPOSE.
 #
 
-__docformat__ = 'restructuredtext'
+"""PyAMS_utils.unicode module
+
+This module provides a small set of functions which can be used to handle unicode data and
+their bytes equivalent.
+"""
 
 import codecs
 import string
 
-
-_unicodeTransTable = {}
+__docformat__ = 'restructuredtext'
 
 
-def _fillUnicodeTransTable():
+_UNICODE_TRANS_TABLE = {}
+
+
+def _fill_unicode_trans_table():
     _corresp = [
         ("A", [0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x0100, 0x0102, 0x0104]),
         ("AE", [0x00C6]),
@@ -69,20 +75,22 @@
     ]
     for char, codes in _corresp:
         for code in codes:
-            _unicodeTransTable[code] = char
+            _UNICODE_TRANS_TABLE[code] = char
 
 
-_fillUnicodeTransTable()
+_fill_unicode_trans_table()
 
-removed_chars = '®©™…'
+
+_REMOVED_CHARS = '®©™…'
 """List of custom characters to remove from input strings"""
 
 
-def translate_string(s, escape_slashes=False, force_lower=True,
+def translate_string(value, escape_slashes=False, force_lower=True,
                      spaces=' ', remove_punctuation=True, keep_chars='_-.'):
+    # pylint: disable=too-many-arguments
     """Remove extended characters and diacritics from string and replace them with 'basic' ones
-    
-    :param str s: text to be cleaned.
+
+    :param str value: text to be translated
     :param boolean escape_slashes: if True, slashes are also converted
     :param boolean force_lower: if True, result is automatically converted to lower case
     :param str spaces: character used to replace spaces
@@ -91,39 +99,39 @@
     :return: text without diacritics or special characters
 
     >>> from pyams_utils.unicode import translate_string
-    >>> input = 'Ceci est un test en Français !!!'
-    >>> translate_string(input)
+    >>> input_string = 'Ceci est un test en Français !!!'
+    >>> translate_string(input_string)
     'ceci est un test en francais'
-    >>> translate_string(input, force_lower=False)
+    >>> translate_string(input_string, force_lower=False)
     'Ceci est un test en Francais'
-    >>> translate_string(input, spaces='-')
+    >>> translate_string(input_string, spaces='-')
     'ceci-est-un-test-en-francais'
-    >>> translate_string(input, remove_punctuation=False)
+    >>> translate_string(input_string, remove_punctuation=False)
     'ceci est un test en francais !!!'
-    >>> translate_string(input, keep_chars='!')
+    >>> translate_string(input_string, keep_chars='!')
     'ceci est un test en francais !!!'
     """
     if escape_slashes:
-        s = s.replace("\\", "/").split("/")[-1]
-    s = s.strip()
-    if isinstance(s, bytes):
-        s = s.decode("utf-8", "replace")
-    s = s.translate(_unicodeTransTable)
+        value = value.replace("\\", "/").split("/")[-1]
+    value = value.strip()
+    if isinstance(value, bytes):
+        value = value.decode("utf-8", "replace")
+    value = value.translate(_UNICODE_TRANS_TABLE)
     if remove_punctuation:
         punctuation = ''.join(filter(lambda x: x not in keep_chars,
-                                     string.punctuation + removed_chars))
-        s = ''.join(filter(lambda x: x not in punctuation, s))
+                                     string.punctuation + _REMOVED_CHARS))
+        value = ''.join(filter(lambda x: x not in punctuation, value))
     if force_lower:
-        s = s.lower()
-    s = s.strip()
+        value = value.lower()
+    value = value.strip()
     if spaces != ' ':
-        s = s.replace(' ', spaces)
-    return s
+        value = value.replace(' ', spaces)
+    return value
 
 
 def nvl(value, default=''):
     """Get specified value, or an empty string if value is empty
-    
+
     :param object value: value to be checked
     :param object default: default value to be returned if value is *false*
     :return: input value, or *default* if value is *false*
@@ -141,7 +149,7 @@
 
 def uninvl(value, default='', encoding='utf-8'):
     """Get specified value converted to unicode, or an empty unicode string if value is empty
-    
+
     :param str/bytes value: the input to be checked
     :param default: str; default value
     :param encoding: str; encoding name to use for conversion
@@ -161,13 +169,13 @@
         return value
     try:
         return codecs.decode(value or default, encoding)
-    except:
+    except ValueError:
         return codecs.decode(value or default, 'latin1')
 
 
 def unidict(value, encoding='utf-8'):
     """Get specified dict with values converted to unicode
-    
+
     :param dict value: input mapping of strings which may be converted to unicode
     :param str encoding: output encoding
     :return: dict; a new mapping with each value converted to unicode
@@ -186,7 +194,7 @@
 
 def unilist(value, encoding='utf-8'):
     """Get specified list with values converted to unicode
-    
+
     :param list value: input list of strings which may be converted to unicode
     :param str encoding: output encoding
     :return: list; a new list with each value converted to unicode