|
1 ### -*- coding: utf-8 -*- #################################################### |
|
2 ############################################################################## |
|
3 # |
|
4 # Copyright (c) 2008 Thierry Florac <tflorac AT ulthar.net> |
|
5 # All Rights Reserved. |
|
6 # |
|
7 # This software is subject to the provisions of the Zope Public License, |
|
8 # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. |
|
9 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED |
|
10 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
11 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS |
|
12 # FOR A PARTICULAR PURPOSE. |
|
13 # |
|
14 ############################################################################## |
|
15 |
|
16 |
|
17 # import standard packages |
|
18 import codecs |
|
19 import string |
|
20 |
|
21 # import Zope3 interfaces |
|
22 |
|
23 # import local interfaces |
|
24 |
|
25 # import Zope3 packages |
|
26 |
|
27 # import local packages |
|
28 |
|
29 |
|
30 _unicodeTransTable = {} |
|
31 def _fillUnicodeTransTable(): |
|
32 _corresp = [ |
|
33 (u"A", [0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x0100, 0x0102, 0x0104]), |
|
34 (u"AE", [0x00C6]), |
|
35 (u"a", [0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x0101, 0x0103, 0x0105]), |
|
36 (u"ae", [0x00E6]), |
|
37 (u"C", [0x00C7, 0x0106, 0x0108, 0x010A, 0x010C]), |
|
38 (u"c", [0x00E7, 0x0107, 0x0109, 0x010B, 0x010D]), |
|
39 (u"D", [0x00D0, 0x010E, 0x0110]), |
|
40 (u"d", [0x00F0, 0x010F, 0x0111]), |
|
41 (u"E", [0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0112, 0x0114, 0x0116, 0x0118, 0x011A]), |
|
42 (u"e", [0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0113, 0x0115, 0x0117, 0x0119, 0x011B]), |
|
43 (u"G", [0x011C, 0x011E, 0x0120, 0x0122]), |
|
44 (u"g", [0x011D, 0x011F, 0x0121, 0x0123]), |
|
45 (u"H", [0x0124, 0x0126]), |
|
46 (u"h", [0x0125, 0x0127]), |
|
47 (u"I", [0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0128, 0x012A, 0x012C, 0x012E, 0x0130]), |
|
48 (u"i", [0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0129, 0x012B, 0x012D, 0x012F, 0x0131]), |
|
49 (u"IJ", [0x0132]), |
|
50 (u"ij", [0x0133]), |
|
51 (u"J", [0x0134]), |
|
52 (u"j", [0x0135]), |
|
53 (u"K", [0x0136]), |
|
54 (u"k", [0x0137, 0x0138]), |
|
55 (u"L", [0x0139, 0x013B, 0x013D, 0x013F, 0x0141]), |
|
56 (u"l", [0x013A, 0x013C, 0x013E, 0x0140, 0x0142]), |
|
57 (u"N", [0x00D1, 0x0143, 0x0145, 0x0147, 0x014A]), |
|
58 (u"n", [0x00F1, 0x0144, 0x0146, 0x0148, 0x0149, 0x014B]), |
|
59 (u"O", [0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D8, 0x014C, 0x014E, 0x0150]), |
|
60 (u"o", [0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F8, 0x014D, 0x014F, 0x0151]), |
|
61 (u"OE", [0x0152]), |
|
62 (u"oe", [0x0153]), |
|
63 (u"R", [0x0154, 0x0156, 0x0158]), |
|
64 (u"r", [0x0155, 0x0157, 0x0159]), |
|
65 (u"S", [0x015A, 0x015C, 0x015E, 0x0160]), |
|
66 (u"s", [0x015B, 0x015D, 0x015F, 0x01610, 0x017F]), |
|
67 (u"T", [0x0162, 0x0164, 0x0166]), |
|
68 (u"t", [0x0163, 0x0165, 0x0167]), |
|
69 (u"U", [0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x016C, 0x016E, 0x0170, 0x172]), |
|
70 (u"u", [0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x016D, 0x016F, 0x0171]), |
|
71 (u"W", [0x0174]), |
|
72 (u"w", [0x0175]), |
|
73 (u"Y", [0x00DD, 0x0176, 0x0178]), |
|
74 (u"y", [0x00FD, 0x00FF, 0x0177]), |
|
75 (u"Z", [0x0179, 0x017B, 0x017D]), |
|
76 (u"z", [0x017A, 0x017C, 0x017E]) |
|
77 ] |
|
78 for char, codes in _corresp: |
|
79 for code in codes : |
|
80 _unicodeTransTable[code] = char |
|
81 |
|
82 _fillUnicodeTransTable() |
|
83 |
|
84 |
|
85 def translateString(s, escapeSlashes=False, forceLower=True, spaces=' ') : |
|
86 """Remove extended characters from string and replace them with 'basic' ones |
|
87 |
|
88 @param s: text to be cleaned. |
|
89 @type s: str or unicode |
|
90 @param escapeSlashes: if True, slashes are also converted |
|
91 @type escapeSlashes: boolean |
|
92 @param forceLower: if True, result is automatically converted to lower case |
|
93 @type forceLower: boolean |
|
94 @return: text without diacritics |
|
95 @rtype: unicode |
|
96 """ |
|
97 if escapeSlashes: |
|
98 s = string.replace(s, "\\", "/").split("/")[-1] |
|
99 s = s.strip() |
|
100 if isinstance(s, str): |
|
101 s = unicode(s, "utf8", "replace") |
|
102 s = s.translate(_unicodeTransTable) |
|
103 s = ''.join([a for a in s.translate(_unicodeTransTable) if a.replace(' ', '-') in (string.ascii_letters + string.digits + '_-.')]) |
|
104 if forceLower: |
|
105 s = s.lower() |
|
106 if spaces != ' ': |
|
107 s = s.replace(' ', spaces) |
|
108 return s |
|
109 |
|
110 |
|
111 def nvl(value, default=''): |
|
112 """Get specified value, or an empty string if value is empty |
|
113 |
|
114 @param value: text to be checked |
|
115 @param default: default value |
|
116 @return: value, or default if value is empty |
|
117 """ |
|
118 return value or default |
|
119 |
|
120 |
|
121 def uninvl(value, default=u''): |
|
122 """Get specified value converted to unicode, or an empty unicode string if value is empty |
|
123 |
|
124 @param value: text to be checked |
|
125 @type value: str or unicode |
|
126 @param default: default value |
|
127 @return: value, or default if value is empty |
|
128 @rtype: unicode |
|
129 """ |
|
130 try: |
|
131 if isinstance(value, unicode): |
|
132 return value |
|
133 return codecs.decode(value or default) |
|
134 except: |
|
135 return codecs.decode(value or default, 'latin1') |
|
136 |
|
137 |
|
138 def unidict(value): |
|
139 """Get specified dict with values converted to unicode |
|
140 |
|
141 @param value: input dict of strings which may be converted to unicode |
|
142 @type value: dict |
|
143 @return: input dict converted to unicode |
|
144 @rtype: dict |
|
145 """ |
|
146 result = {} |
|
147 for key in value: |
|
148 result[key] = uninvl(value[key]) |
|
149 return result |
|
150 |
|
151 |
|
152 def unilist(value): |
|
153 """Get specified list with values converted to unicode |
|
154 |
|
155 @param value: input list of strings which may be converted to unicode |
|
156 @type value: list |
|
157 @return: input list converted to unicode |
|
158 @rtype: list |
|
159 """ |
|
160 if not isinstance(value, (list, tuple)): |
|
161 return uninvl(value) |
|
162 return [uninvl(v) for v in value] |
|
163 |
|
164 |
|
165 def utf8(value): |
|
166 """Convert given value to UTF-8""" |
|
167 if isinstance(value, unicode): |
|
168 value = value.encode('utf8') |
|
169 return value |