src/pyams_utils/html.py
changeset 92 483b701c99db
parent 72 9049384a2bd4
child 292 b338586588ad
--- a/src/pyams_utils/html.py	Mon Jun 19 15:43:27 2017 +0200
+++ b/src/pyams_utils/html.py	Mon Jun 19 15:45:39 2017 +0200
@@ -110,11 +110,19 @@
     >>> html_to_text(html)
     'This is a HTML text part.\\n'
 
+    >>> html = '''<p>This is text with french accents: <strong>é à è ù</strong></p>'''
+    >>> html_to_text(html)
+    'This is text with french accents: é à è ù\\n'
+
     HTML parser should handle entities correctly:
 
+    >>> html = '''<div><p>Header</p><p>This is an &lt; &#242; &gt; entity.<br /></p></div>'''
+    >>> html_to_text(html)
+    'Header\\nThis is an < ò > entity.\\n\\n'
+
     >>> html = '''<div><p>Header</p><p>This is an &lt;&nbsp;&#242;&nbsp;&gt; entity.<br /></p></div>'''
     >>> html_to_text(html)
-    'Header\\nThis is an < o > entity.\\n\\n'
+    'Header\\nThis is an <\xa0ò\xa0> entity.\\n\\n'
     """
     if value is None:
         return ''