--- a/src/pyams_utils/html.py Mon Jun 19 15:43:27 2017 +0200
+++ b/src/pyams_utils/html.py Mon Jun 19 15:45:39 2017 +0200
@@ -110,11 +110,19 @@
>>> html_to_text(html)
'This is a HTML text part.\\n'
+ >>> html = '''<p>This is text with french accents: <strong>é à è ù</strong></p>'''
+ >>> html_to_text(html)
+ 'This is text with french accents: é à è ù\\n'
+
HTML parser should handle entities correctly:
+ >>> html = '''<div><p>Header</p><p>This is an < ò > entity.<br /></p></div>'''
+ >>> html_to_text(html)
+ 'Header\\nThis is an < ò > entity.\\n\\n'
+
>>> html = '''<div><p>Header</p><p>This is an < ò > entity.<br /></p></div>'''
>>> html_to_text(html)
- 'Header\\nThis is an < o > entity.\\n\\n'
+ 'Header\\nThis is an <\xa0ò\xa0> entity.\\n\\n'
"""
if value is None:
return ''