--- a/src/pyams_utils/unicode.py Wed Nov 20 19:26:23 2019 +0100
+++ b/src/pyams_utils/unicode.py Fri Nov 22 18:51:37 2019 +0100
@@ -10,16 +10,22 @@
# FOR A PARTICULAR PURPOSE.
#
-__docformat__ = 'restructuredtext'
+"""PyAMS_utils.unicode module
+
+This module provides a small set of functions which can be used to handle unicode data and
+their bytes equivalent.
+"""
import codecs
import string
-
-_unicodeTransTable = {}
+__docformat__ = 'restructuredtext'
-def _fillUnicodeTransTable():
+_UNICODE_TRANS_TABLE = {}
+
+
+def _fill_unicode_trans_table():
_corresp = [
("A", [0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x0100, 0x0102, 0x0104]),
("AE", [0x00C6]),
@@ -69,20 +75,22 @@
]
for char, codes in _corresp:
for code in codes:
- _unicodeTransTable[code] = char
+ _UNICODE_TRANS_TABLE[code] = char
-_fillUnicodeTransTable()
+_fill_unicode_trans_table()
-removed_chars = '®©™…'
+
+_REMOVED_CHARS = '®©™…'
"""List of custom characters to remove from input strings"""
-def translate_string(s, escape_slashes=False, force_lower=True,
+def translate_string(value, escape_slashes=False, force_lower=True,
spaces=' ', remove_punctuation=True, keep_chars='_-.'):
+ # pylint: disable=too-many-arguments
"""Remove extended characters and diacritics from string and replace them with 'basic' ones
-
- :param str s: text to be cleaned.
+
+ :param str value: text to be translated
:param boolean escape_slashes: if True, slashes are also converted
:param boolean force_lower: if True, result is automatically converted to lower case
:param str spaces: character used to replace spaces
@@ -91,39 +99,39 @@
:return: text without diacritics or special characters
>>> from pyams_utils.unicode import translate_string
- >>> input = 'Ceci est un test en Français !!!'
- >>> translate_string(input)
+ >>> input_string = 'Ceci est un test en Français !!!'
+ >>> translate_string(input_string)
'ceci est un test en francais'
- >>> translate_string(input, force_lower=False)
+ >>> translate_string(input_string, force_lower=False)
'Ceci est un test en Francais'
- >>> translate_string(input, spaces='-')
+ >>> translate_string(input_string, spaces='-')
'ceci-est-un-test-en-francais'
- >>> translate_string(input, remove_punctuation=False)
+ >>> translate_string(input_string, remove_punctuation=False)
'ceci est un test en francais !!!'
- >>> translate_string(input, keep_chars='!')
+ >>> translate_string(input_string, keep_chars='!')
'ceci est un test en francais !!!'
"""
if escape_slashes:
- s = s.replace("\\", "/").split("/")[-1]
- s = s.strip()
- if isinstance(s, bytes):
- s = s.decode("utf-8", "replace")
- s = s.translate(_unicodeTransTable)
+ value = value.replace("\\", "/").split("/")[-1]
+ value = value.strip()
+ if isinstance(value, bytes):
+ value = value.decode("utf-8", "replace")
+ value = value.translate(_UNICODE_TRANS_TABLE)
if remove_punctuation:
punctuation = ''.join(filter(lambda x: x not in keep_chars,
- string.punctuation + removed_chars))
- s = ''.join(filter(lambda x: x not in punctuation, s))
+ string.punctuation + _REMOVED_CHARS))
+ value = ''.join(filter(lambda x: x not in punctuation, value))
if force_lower:
- s = s.lower()
- s = s.strip()
+ value = value.lower()
+ value = value.strip()
if spaces != ' ':
- s = s.replace(' ', spaces)
- return s
+ value = value.replace(' ', spaces)
+ return value
def nvl(value, default=''):
"""Get specified value, or an empty string if value is empty
-
+
:param object value: value to be checked
:param object default: default value to be returned if value is *false*
:return: input value, or *default* if value is *false*
@@ -141,7 +149,7 @@
def uninvl(value, default='', encoding='utf-8'):
"""Get specified value converted to unicode, or an empty unicode string if value is empty
-
+
:param str/bytes value: the input to be checked
:param default: str; default value
:param encoding: str; encoding name to use for conversion
@@ -161,13 +169,13 @@
return value
try:
return codecs.decode(value or default, encoding)
- except:
+ except ValueError:
return codecs.decode(value or default, 'latin1')
def unidict(value, encoding='utf-8'):
"""Get specified dict with values converted to unicode
-
+
:param dict value: input mapping of strings which may be converted to unicode
:param str encoding: output encoding
:return: dict; a new mapping with each value converted to unicode
@@ -186,7 +194,7 @@
def unilist(value, encoding='utf-8'):
"""Get specified list with values converted to unicode
-
+
:param list value: input list of strings which may be converted to unicode
:param str encoding: output encoding
:return: list; a new list with each value converted to unicode